|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.981132075471699, |
|
"global_step": 390, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1e-08, |
|
"loss": 8.1684, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 8.85881519317627, |
|
"eval_runtime": 219.4921, |
|
"eval_samples_per_second": 19.481, |
|
"eval_steps_per_second": 9.741, |
|
"eval_wer": 1.0125384665750017, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.736842105263159e-09, |
|
"loss": 8.1428, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 8.856884956359863, |
|
"eval_runtime": 224.2813, |
|
"eval_samples_per_second": 19.065, |
|
"eval_steps_per_second": 9.533, |
|
"eval_wer": 1.012505729064362, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.473684210526316e-09, |
|
"loss": 8.1333, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 8.855155944824219, |
|
"eval_runtime": 226.0385, |
|
"eval_samples_per_second": 18.917, |
|
"eval_steps_per_second": 9.459, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.210526315789473e-09, |
|
"loss": 8.7873, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 8.85318660736084, |
|
"eval_runtime": 220.4335, |
|
"eval_samples_per_second": 19.398, |
|
"eval_steps_per_second": 9.699, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.947368421052632e-09, |
|
"loss": 8.1298, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 8.851649284362793, |
|
"eval_runtime": 224.7965, |
|
"eval_samples_per_second": 19.022, |
|
"eval_steps_per_second": 9.511, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.68421052631579e-09, |
|
"loss": 8.1445, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 8.84989070892334, |
|
"eval_runtime": 220.475, |
|
"eval_samples_per_second": 19.394, |
|
"eval_steps_per_second": 9.697, |
|
"eval_wer": 1.0123420415111635, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.421052631578947e-09, |
|
"loss": 8.1635, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 8.8483304977417, |
|
"eval_runtime": 222.7151, |
|
"eval_samples_per_second": 19.199, |
|
"eval_steps_per_second": 9.6, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.157894736842106e-09, |
|
"loss": 8.7587, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 8.846796989440918, |
|
"eval_runtime": 221.1122, |
|
"eval_samples_per_second": 19.339, |
|
"eval_steps_per_second": 9.669, |
|
"eval_wer": 1.0125384665750017, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.894736842105263e-09, |
|
"loss": 8.1424, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 8.845438957214355, |
|
"eval_runtime": 219.7737, |
|
"eval_samples_per_second": 19.456, |
|
"eval_steps_per_second": 9.728, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.631578947368422e-09, |
|
"loss": 8.1318, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 8.844048500061035, |
|
"eval_runtime": 222.4326, |
|
"eval_samples_per_second": 19.224, |
|
"eval_steps_per_second": 9.612, |
|
"eval_wer": 1.0124402540430826, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 7.368421052631579e-09, |
|
"loss": 8.1469, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 8.842790603637695, |
|
"eval_runtime": 224.1044, |
|
"eval_samples_per_second": 19.08, |
|
"eval_steps_per_second": 9.54, |
|
"eval_wer": 1.012505729064362, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.105263157894737e-09, |
|
"loss": 8.7602, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 8.841601371765137, |
|
"eval_runtime": 222.7003, |
|
"eval_samples_per_second": 19.201, |
|
"eval_steps_per_second": 9.6, |
|
"eval_wer": 1.0124729915537223, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.842105263157895e-09, |
|
"loss": 8.1584, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 8.840473175048828, |
|
"eval_runtime": 220.9442, |
|
"eval_samples_per_second": 19.353, |
|
"eval_steps_per_second": 9.677, |
|
"eval_wer": 1.0125712040856414, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 6.578947368421054e-09, |
|
"loss": 8.142, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 8.839417457580566, |
|
"eval_runtime": 223.4762, |
|
"eval_samples_per_second": 19.134, |
|
"eval_steps_per_second": 9.567, |
|
"eval_wer": 1.0125712040856414, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.31578947368421e-09, |
|
"loss": 8.1285, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 8.838351249694824, |
|
"eval_runtime": 225.8637, |
|
"eval_samples_per_second": 18.932, |
|
"eval_steps_per_second": 9.466, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6.052631578947369e-09, |
|
"loss": 8.7756, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 8.837142944335938, |
|
"eval_runtime": 224.8527, |
|
"eval_samples_per_second": 19.017, |
|
"eval_steps_per_second": 9.508, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5.789473684210527e-09, |
|
"loss": 8.0991, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 8.83634090423584, |
|
"eval_runtime": 220.3123, |
|
"eval_samples_per_second": 19.409, |
|
"eval_steps_per_second": 9.704, |
|
"eval_wer": 1.0124729915537223, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 5.526315789473685e-09, |
|
"loss": 8.1442, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 8.83536434173584, |
|
"eval_runtime": 224.5432, |
|
"eval_samples_per_second": 19.043, |
|
"eval_steps_per_second": 9.522, |
|
"eval_wer": 1.0124402540430826, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.263157894736842e-09, |
|
"loss": 8.1294, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 8.834578514099121, |
|
"eval_runtime": 220.0402, |
|
"eval_samples_per_second": 19.433, |
|
"eval_steps_per_second": 9.716, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 5e-09, |
|
"loss": 8.7276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 8.833772659301758, |
|
"eval_runtime": 224.0823, |
|
"eval_samples_per_second": 19.082, |
|
"eval_steps_per_second": 9.541, |
|
"eval_wer": 1.0125384665750017, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.736842105263158e-09, |
|
"loss": 8.1439, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 8.832892417907715, |
|
"eval_runtime": 220.6908, |
|
"eval_samples_per_second": 19.376, |
|
"eval_steps_per_second": 9.688, |
|
"eval_wer": 1.0124402540430826, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.473684210526316e-09, |
|
"loss": 8.1115, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 8.832157135009766, |
|
"eval_runtime": 221.8649, |
|
"eval_samples_per_second": 19.273, |
|
"eval_steps_per_second": 9.636, |
|
"eval_wer": 1.0124402540430826, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.210526315789473e-09, |
|
"loss": 8.1501, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 8.831602096557617, |
|
"eval_runtime": 223.55, |
|
"eval_samples_per_second": 19.128, |
|
"eval_steps_per_second": 9.564, |
|
"eval_wer": 1.0125384665750017, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3.947368421052631e-09, |
|
"loss": 8.7143, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 8.830825805664062, |
|
"eval_runtime": 224.3279, |
|
"eval_samples_per_second": 19.061, |
|
"eval_steps_per_second": 9.531, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.6842105263157894e-09, |
|
"loss": 8.143, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 8.830228805541992, |
|
"eval_runtime": 225.4738, |
|
"eval_samples_per_second": 18.965, |
|
"eval_steps_per_second": 9.482, |
|
"eval_wer": 1.0123747790218032, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 3.4210526315789474e-09, |
|
"loss": 8.1528, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 8.829960823059082, |
|
"eval_runtime": 222.8802, |
|
"eval_samples_per_second": 19.185, |
|
"eval_steps_per_second": 9.593, |
|
"eval_wer": 1.0124729915537223, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 3.1842105263157894e-09, |
|
"loss": 8.1293, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 8.829716682434082, |
|
"eval_runtime": 223.7307, |
|
"eval_samples_per_second": 19.112, |
|
"eval_steps_per_second": 9.556, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.9210526315789475e-09, |
|
"loss": 8.7519, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 8.829301834106445, |
|
"eval_runtime": 223.0404, |
|
"eval_samples_per_second": 19.171, |
|
"eval_steps_per_second": 9.586, |
|
"eval_wer": 1.0124729915537223, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 2.657894736842105e-09, |
|
"loss": 8.1153, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"eval_loss": 8.828947067260742, |
|
"eval_runtime": 219.8129, |
|
"eval_samples_per_second": 19.453, |
|
"eval_steps_per_second": 9.726, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 2.394736842105263e-09, |
|
"loss": 8.1292, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 8.828753471374512, |
|
"eval_runtime": 222.9513, |
|
"eval_samples_per_second": 19.179, |
|
"eval_steps_per_second": 9.59, |
|
"eval_wer": 1.0124402540430826, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 2.131578947368421e-09, |
|
"loss": 8.0904, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_loss": 8.828449249267578, |
|
"eval_runtime": 224.0134, |
|
"eval_samples_per_second": 19.088, |
|
"eval_steps_per_second": 9.544, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1.868421052631579e-09, |
|
"loss": 8.7425, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 8.828290939331055, |
|
"eval_runtime": 219.9475, |
|
"eval_samples_per_second": 19.441, |
|
"eval_steps_per_second": 9.721, |
|
"eval_wer": 1.0125384665750017, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 1.605263157894737e-09, |
|
"loss": 8.0963, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 8.828081130981445, |
|
"eval_runtime": 222.5212, |
|
"eval_samples_per_second": 19.216, |
|
"eval_steps_per_second": 9.608, |
|
"eval_wer": 1.0124075165324429, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 1.3421052631578948e-09, |
|
"loss": 8.1112, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_loss": 8.828051567077637, |
|
"eval_runtime": 222.696, |
|
"eval_samples_per_second": 19.201, |
|
"eval_steps_per_second": 9.601, |
|
"eval_wer": 1.0124402540430826, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.0789473684210528e-09, |
|
"loss": 8.124, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 8.828123092651367, |
|
"eval_runtime": 222.2122, |
|
"eval_samples_per_second": 19.243, |
|
"eval_steps_per_second": 9.621, |
|
"eval_wer": 1.012505729064362, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 8.157894736842106e-10, |
|
"loss": 8.7327, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 8.827865600585938, |
|
"eval_runtime": 222.5189, |
|
"eval_samples_per_second": 19.216, |
|
"eval_steps_per_second": 9.608, |
|
"eval_wer": 1.0123420415111635, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.526315789473684e-10, |
|
"loss": 8.1261, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"eval_loss": 8.827857971191406, |
|
"eval_runtime": 224.6034, |
|
"eval_samples_per_second": 19.038, |
|
"eval_steps_per_second": 9.519, |
|
"eval_wer": 1.012603941596281, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.894736842105263e-10, |
|
"loss": 8.1259, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 8.827925682067871, |
|
"eval_runtime": 223.0189, |
|
"eval_samples_per_second": 19.173, |
|
"eval_steps_per_second": 9.587, |
|
"eval_wer": 1.0123747790218032, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 2.631578947368421e-11, |
|
"loss": 8.1116, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_loss": 8.827937126159668, |
|
"eval_runtime": 224.7494, |
|
"eval_samples_per_second": 19.026, |
|
"eval_steps_per_second": 9.513, |
|
"eval_wer": 1.0123093040005238, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"step": 390, |
|
"total_flos": 1.7181016563618468e+19, |
|
"train_loss": 8.274780469063002, |
|
"train_runtime": 14935.5952, |
|
"train_samples_per_second": 6.813, |
|
"train_steps_per_second": 0.026 |
|
} |
|
], |
|
"max_steps": 390, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.7181016563618468e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|