|
{ |
|
"best_global_step": 12000, |
|
"best_metric": 51.341187305729306, |
|
"best_model_checkpoint": "./whisper-medium-ml-exp2/checkpoint-12000", |
|
"epoch": 4.111466666666667, |
|
"eval_steps": 500, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03333333333333333, |
|
"grad_norm": 1.354724407196045, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 0.3119, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03333333333333333, |
|
"eval_loss": 0.4192824959754944, |
|
"eval_runtime": 3090.4729, |
|
"eval_samples_per_second": 1.902, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 79.09167658006831, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"grad_norm": 0.926690399646759, |
|
"learning_rate": 9.657241379310346e-06, |
|
"loss": 0.0405, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06666666666666667, |
|
"eval_loss": 0.4335130751132965, |
|
"eval_runtime": 2992.1696, |
|
"eval_samples_per_second": 1.964, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 73.58302313979816, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.8192572593688965, |
|
"learning_rate": 9.312413793103448e-06, |
|
"loss": 0.0355, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 0.43317151069641113, |
|
"eval_runtime": 3082.3837, |
|
"eval_samples_per_second": 1.907, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 77.27848344142139, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"grad_norm": 1.0174602270126343, |
|
"learning_rate": 8.967586206896553e-06, |
|
"loss": 0.126, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13333333333333333, |
|
"eval_loss": 0.19669494032859802, |
|
"eval_runtime": 3003.1726, |
|
"eval_samples_per_second": 1.957, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 58.40208757051306, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 1.0093193054199219, |
|
"learning_rate": 8.622758620689657e-06, |
|
"loss": 0.0519, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"eval_loss": 0.18607404828071594, |
|
"eval_runtime": 3062.2455, |
|
"eval_samples_per_second": 1.92, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 58.567097739744426, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.2591651678085327, |
|
"learning_rate": 8.27793103448276e-06, |
|
"loss": 0.0439, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.19417671859264374, |
|
"eval_runtime": 2997.3111, |
|
"eval_samples_per_second": 1.961, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 57.42737633830922, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0112, |
|
"grad_norm": 1.0831571817398071, |
|
"learning_rate": 7.933103448275864e-06, |
|
"loss": 0.0534, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0112, |
|
"eval_loss": 0.193574920296669, |
|
"eval_runtime": 2956.9818, |
|
"eval_samples_per_second": 1.988, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 61.14969876050501, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.0445333333333333, |
|
"grad_norm": 1.4980469942092896, |
|
"learning_rate": 7.588275862068966e-06, |
|
"loss": 0.0214, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0445333333333333, |
|
"eval_loss": 0.22530874609947205, |
|
"eval_runtime": 3078.9727, |
|
"eval_samples_per_second": 1.909, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 59.78164933420316, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.0778666666666668, |
|
"grad_norm": 0.5621655583381653, |
|
"learning_rate": 7.243448275862069e-06, |
|
"loss": 0.0129, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0778666666666668, |
|
"eval_loss": 0.26299336552619934, |
|
"eval_runtime": 2998.2811, |
|
"eval_samples_per_second": 1.96, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 61.061437507195215, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.1112, |
|
"grad_norm": 1.5936987400054932, |
|
"learning_rate": 6.9e-06, |
|
"loss": 0.048, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1112, |
|
"eval_loss": 0.17795228958129883, |
|
"eval_runtime": 2987.0925, |
|
"eval_samples_per_second": 1.968, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 56.3605664069995, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1445333333333334, |
|
"grad_norm": 0.48164331912994385, |
|
"learning_rate": 6.555172413793104e-06, |
|
"loss": 0.047, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.1445333333333334, |
|
"eval_loss": 0.16377924382686615, |
|
"eval_runtime": 2984.4656, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 52.99512644383898, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.1778666666666666, |
|
"grad_norm": 0.45277634263038635, |
|
"learning_rate": 6.2103448275862075e-06, |
|
"loss": 0.0325, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1778666666666666, |
|
"eval_loss": 0.16828514635562897, |
|
"eval_runtime": 3034.653, |
|
"eval_samples_per_second": 1.937, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 54.55121071414867, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.2112, |
|
"grad_norm": 0.6970316767692566, |
|
"learning_rate": 5.865517241379311e-06, |
|
"loss": 0.0293, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2112, |
|
"eval_loss": 0.16886456310749054, |
|
"eval_runtime": 3048.6335, |
|
"eval_samples_per_second": 1.928, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 57.24509766299551, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.0224, |
|
"grad_norm": 0.521206259727478, |
|
"learning_rate": 5.520689655172414e-06, |
|
"loss": 0.028, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.0224, |
|
"eval_loss": 0.21454408764839172, |
|
"eval_runtime": 2924.4034, |
|
"eval_samples_per_second": 2.01, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 56.52365785333282, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.0557333333333334, |
|
"grad_norm": 0.5115911364555359, |
|
"learning_rate": 5.175862068965518e-06, |
|
"loss": 0.009, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.0557333333333334, |
|
"eval_loss": 0.22271297872066498, |
|
"eval_runtime": 2915.2649, |
|
"eval_samples_per_second": 2.016, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 56.30684216585441, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.0890666666666666, |
|
"grad_norm": 0.5929153561592102, |
|
"learning_rate": 4.831034482758621e-06, |
|
"loss": 0.0076, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.0890666666666666, |
|
"eval_loss": 0.2749842405319214, |
|
"eval_runtime": 3041.7569, |
|
"eval_samples_per_second": 1.932, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 66.05395448789287, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.1224, |
|
"grad_norm": 0.3881845474243164, |
|
"learning_rate": 4.486206896551725e-06, |
|
"loss": 0.0385, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.1224, |
|
"eval_loss": 0.21780993044376373, |
|
"eval_runtime": 2981.8862, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 54.45143712345063, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.1557333333333335, |
|
"grad_norm": 0.688723623752594, |
|
"learning_rate": 4.141379310344828e-06, |
|
"loss": 0.0245, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.1557333333333335, |
|
"eval_loss": 0.1720988005399704, |
|
"eval_runtime": 3050.0463, |
|
"eval_samples_per_second": 1.927, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 52.00314670555278, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.1890666666666667, |
|
"grad_norm": 0.39908483624458313, |
|
"learning_rate": 3.7965517241379313e-06, |
|
"loss": 0.0226, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.1890666666666667, |
|
"eval_loss": 0.1741122305393219, |
|
"eval_runtime": 3209.6808, |
|
"eval_samples_per_second": 1.831, |
|
"eval_steps_per_second": 0.057, |
|
"eval_wer": 53.75110326566638, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.0002666666666666, |
|
"grad_norm": 2.697366237640381, |
|
"learning_rate": 3.4517241379310346e-06, |
|
"loss": 0.0212, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.0002666666666666, |
|
"eval_loss": 0.20012931525707245, |
|
"eval_runtime": 3160.8774, |
|
"eval_samples_per_second": 1.86, |
|
"eval_steps_per_second": 0.058, |
|
"eval_wer": 56.14950688821521, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.0336, |
|
"grad_norm": 0.3792371451854706, |
|
"learning_rate": 3.1068965517241384e-06, |
|
"loss": 0.0121, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.0336, |
|
"eval_loss": 0.23216772079467773, |
|
"eval_runtime": 3109.7166, |
|
"eval_samples_per_second": 1.89, |
|
"eval_steps_per_second": 0.059, |
|
"eval_wer": 55.472197705207414, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.0669333333333335, |
|
"grad_norm": 0.2577882409095764, |
|
"learning_rate": 2.7620689655172417e-06, |
|
"loss": 0.0042, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.0669333333333335, |
|
"eval_loss": 0.24030156433582306, |
|
"eval_runtime": 3127.3687, |
|
"eval_samples_per_second": 1.88, |
|
"eval_steps_per_second": 0.059, |
|
"eval_wer": 57.6864039295445, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.1002666666666667, |
|
"grad_norm": 0.24431835114955902, |
|
"learning_rate": 2.4179310344827587e-06, |
|
"loss": 0.0059, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.1002666666666667, |
|
"eval_loss": 0.2953338325023651, |
|
"eval_runtime": 3087.8251, |
|
"eval_samples_per_second": 1.904, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 64.00667715568518, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.1336, |
|
"grad_norm": 0.6591205596923828, |
|
"learning_rate": 2.073793103448276e-06, |
|
"loss": 0.0248, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.1336, |
|
"eval_loss": 0.1744297742843628, |
|
"eval_runtime": 2988.8074, |
|
"eval_samples_per_second": 1.967, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 51.341187305729306, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.166933333333333, |
|
"grad_norm": 0.45652803778648376, |
|
"learning_rate": 1.7289655172413794e-06, |
|
"loss": 0.0172, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.166933333333333, |
|
"eval_loss": 0.18724997341632843, |
|
"eval_runtime": 3015.7946, |
|
"eval_samples_per_second": 1.949, |
|
"eval_steps_per_second": 0.061, |
|
"eval_wer": 53.53236885528992, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.200266666666667, |
|
"grad_norm": 0.42330440878868103, |
|
"learning_rate": 1.384137931034483e-06, |
|
"loss": 0.015, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.200266666666667, |
|
"eval_loss": 0.19304682314395905, |
|
"eval_runtime": 3065.8492, |
|
"eval_samples_per_second": 1.917, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 54.702789823093745, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.011466666666666, |
|
"grad_norm": 0.7199889421463013, |
|
"learning_rate": 1.0393103448275863e-06, |
|
"loss": 0.0158, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.011466666666666, |
|
"eval_loss": 0.21734359860420227, |
|
"eval_runtime": 3066.4573, |
|
"eval_samples_per_second": 1.917, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 60.96358263939522, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.0448, |
|
"grad_norm": 0.42120951414108276, |
|
"learning_rate": 6.944827586206897e-07, |
|
"loss": 0.0028, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.0448, |
|
"eval_loss": 0.23296251893043518, |
|
"eval_runtime": 2966.4298, |
|
"eval_samples_per_second": 1.982, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 53.49207567443109, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.078133333333334, |
|
"grad_norm": 0.202627032995224, |
|
"learning_rate": 3.496551724137931e-07, |
|
"loss": 0.0028, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.078133333333334, |
|
"eval_loss": 0.24154414236545563, |
|
"eval_runtime": 2989.4533, |
|
"eval_samples_per_second": 1.966, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 53.47672589124679, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.111466666666667, |
|
"grad_norm": 2.03951358795166, |
|
"learning_rate": 4.827586206896552e-09, |
|
"loss": 0.0194, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.111466666666667, |
|
"eval_loss": 0.22199244797229767, |
|
"eval_runtime": 3069.065, |
|
"eval_samples_per_second": 1.915, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 57.69216009823861, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.111466666666667, |
|
"step": 15000, |
|
"total_flos": 4.898454489936691e+20, |
|
"train_loss": 0.03751766018072764, |
|
"train_runtime": 129332.0882, |
|
"train_samples_per_second": 3.711, |
|
"train_steps_per_second": 0.116 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.898454489936691e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|