{ "best_global_step": 12000, "best_metric": 51.341187305729306, "best_model_checkpoint": "./whisper-medium-ml-exp2/checkpoint-12000", "epoch": 4.111466666666667, "eval_steps": 500, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03333333333333333, "grad_norm": 1.354724407196045, "learning_rate": 9.940000000000001e-06, "loss": 0.3119, "step": 500 }, { "epoch": 0.03333333333333333, "eval_loss": 0.4192824959754944, "eval_runtime": 3090.4729, "eval_samples_per_second": 1.902, "eval_steps_per_second": 0.06, "eval_wer": 79.09167658006831, "step": 500 }, { "epoch": 0.06666666666666667, "grad_norm": 0.926690399646759, "learning_rate": 9.657241379310346e-06, "loss": 0.0405, "step": 1000 }, { "epoch": 0.06666666666666667, "eval_loss": 0.4335130751132965, "eval_runtime": 2992.1696, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.061, "eval_wer": 73.58302313979816, "step": 1000 }, { "epoch": 0.1, "grad_norm": 0.8192572593688965, "learning_rate": 9.312413793103448e-06, "loss": 0.0355, "step": 1500 }, { "epoch": 0.1, "eval_loss": 0.43317151069641113, "eval_runtime": 3082.3837, "eval_samples_per_second": 1.907, "eval_steps_per_second": 0.06, "eval_wer": 77.27848344142139, "step": 1500 }, { "epoch": 0.13333333333333333, "grad_norm": 1.0174602270126343, "learning_rate": 8.967586206896553e-06, "loss": 0.126, "step": 2000 }, { "epoch": 0.13333333333333333, "eval_loss": 0.19669494032859802, "eval_runtime": 3003.1726, "eval_samples_per_second": 1.957, "eval_steps_per_second": 0.061, "eval_wer": 58.40208757051306, "step": 2000 }, { "epoch": 0.16666666666666666, "grad_norm": 1.0093193054199219, "learning_rate": 8.622758620689657e-06, "loss": 0.0519, "step": 2500 }, { "epoch": 0.16666666666666666, "eval_loss": 0.18607404828071594, "eval_runtime": 3062.2455, "eval_samples_per_second": 1.92, "eval_steps_per_second": 0.06, "eval_wer": 58.567097739744426, "step": 2500 }, { "epoch": 0.2, "grad_norm": 1.2591651678085327, "learning_rate": 8.27793103448276e-06, "loss": 0.0439, "step": 3000 }, { "epoch": 0.2, "eval_loss": 0.19417671859264374, "eval_runtime": 2997.3111, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.061, "eval_wer": 57.42737633830922, "step": 3000 }, { "epoch": 1.0112, "grad_norm": 1.0831571817398071, "learning_rate": 7.933103448275864e-06, "loss": 0.0534, "step": 3500 }, { "epoch": 1.0112, "eval_loss": 0.193574920296669, "eval_runtime": 2956.9818, "eval_samples_per_second": 1.988, "eval_steps_per_second": 0.062, "eval_wer": 61.14969876050501, "step": 3500 }, { "epoch": 1.0445333333333333, "grad_norm": 1.4980469942092896, "learning_rate": 7.588275862068966e-06, "loss": 0.0214, "step": 4000 }, { "epoch": 1.0445333333333333, "eval_loss": 0.22530874609947205, "eval_runtime": 3078.9727, "eval_samples_per_second": 1.909, "eval_steps_per_second": 0.06, "eval_wer": 59.78164933420316, "step": 4000 }, { "epoch": 1.0778666666666668, "grad_norm": 0.5621655583381653, "learning_rate": 7.243448275862069e-06, "loss": 0.0129, "step": 4500 }, { "epoch": 1.0778666666666668, "eval_loss": 0.26299336552619934, "eval_runtime": 2998.2811, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.061, "eval_wer": 61.061437507195215, "step": 4500 }, { "epoch": 1.1112, "grad_norm": 1.5936987400054932, "learning_rate": 6.9e-06, "loss": 0.048, "step": 5000 }, { "epoch": 1.1112, "eval_loss": 0.17795228958129883, "eval_runtime": 2987.0925, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.062, "eval_wer": 56.3605664069995, "step": 5000 }, { "epoch": 1.1445333333333334, "grad_norm": 0.48164331912994385, "learning_rate": 6.555172413793104e-06, "loss": 0.047, "step": 5500 }, { "epoch": 1.1445333333333334, "eval_loss": 0.16377924382686615, "eval_runtime": 2984.4656, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.062, "eval_wer": 52.99512644383898, "step": 5500 }, { "epoch": 1.1778666666666666, "grad_norm": 0.45277634263038635, "learning_rate": 6.2103448275862075e-06, "loss": 0.0325, "step": 6000 }, { "epoch": 1.1778666666666666, "eval_loss": 0.16828514635562897, "eval_runtime": 3034.653, "eval_samples_per_second": 1.937, "eval_steps_per_second": 0.061, "eval_wer": 54.55121071414867, "step": 6000 }, { "epoch": 1.2112, "grad_norm": 0.6970316767692566, "learning_rate": 5.865517241379311e-06, "loss": 0.0293, "step": 6500 }, { "epoch": 1.2112, "eval_loss": 0.16886456310749054, "eval_runtime": 3048.6335, "eval_samples_per_second": 1.928, "eval_steps_per_second": 0.06, "eval_wer": 57.24509766299551, "step": 6500 }, { "epoch": 2.0224, "grad_norm": 0.521206259727478, "learning_rate": 5.520689655172414e-06, "loss": 0.028, "step": 7000 }, { "epoch": 2.0224, "eval_loss": 0.21454408764839172, "eval_runtime": 2924.4034, "eval_samples_per_second": 2.01, "eval_steps_per_second": 0.063, "eval_wer": 56.52365785333282, "step": 7000 }, { "epoch": 2.0557333333333334, "grad_norm": 0.5115911364555359, "learning_rate": 5.175862068965518e-06, "loss": 0.009, "step": 7500 }, { "epoch": 2.0557333333333334, "eval_loss": 0.22271297872066498, "eval_runtime": 2915.2649, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.063, "eval_wer": 56.30684216585441, "step": 7500 }, { "epoch": 2.0890666666666666, "grad_norm": 0.5929153561592102, "learning_rate": 4.831034482758621e-06, "loss": 0.0076, "step": 8000 }, { "epoch": 2.0890666666666666, "eval_loss": 0.2749842405319214, "eval_runtime": 3041.7569, "eval_samples_per_second": 1.932, "eval_steps_per_second": 0.06, "eval_wer": 66.05395448789287, "step": 8000 }, { "epoch": 2.1224, "grad_norm": 0.3881845474243164, "learning_rate": 4.486206896551725e-06, "loss": 0.0385, "step": 8500 }, { "epoch": 2.1224, "eval_loss": 0.21780993044376373, "eval_runtime": 2981.8862, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.062, "eval_wer": 54.45143712345063, "step": 8500 }, { "epoch": 2.1557333333333335, "grad_norm": 0.688723623752594, "learning_rate": 4.141379310344828e-06, "loss": 0.0245, "step": 9000 }, { "epoch": 2.1557333333333335, "eval_loss": 0.1720988005399704, "eval_runtime": 3050.0463, "eval_samples_per_second": 1.927, "eval_steps_per_second": 0.06, "eval_wer": 52.00314670555278, "step": 9000 }, { "epoch": 2.1890666666666667, "grad_norm": 0.39908483624458313, "learning_rate": 3.7965517241379313e-06, "loss": 0.0226, "step": 9500 }, { "epoch": 2.1890666666666667, "eval_loss": 0.1741122305393219, "eval_runtime": 3209.6808, "eval_samples_per_second": 1.831, "eval_steps_per_second": 0.057, "eval_wer": 53.75110326566638, "step": 9500 }, { "epoch": 3.0002666666666666, "grad_norm": 2.697366237640381, "learning_rate": 3.4517241379310346e-06, "loss": 0.0212, "step": 10000 }, { "epoch": 3.0002666666666666, "eval_loss": 0.20012931525707245, "eval_runtime": 3160.8774, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.058, "eval_wer": 56.14950688821521, "step": 10000 }, { "epoch": 3.0336, "grad_norm": 0.3792371451854706, "learning_rate": 3.1068965517241384e-06, "loss": 0.0121, "step": 10500 }, { "epoch": 3.0336, "eval_loss": 0.23216772079467773, "eval_runtime": 3109.7166, "eval_samples_per_second": 1.89, "eval_steps_per_second": 0.059, "eval_wer": 55.472197705207414, "step": 10500 }, { "epoch": 3.0669333333333335, "grad_norm": 0.2577882409095764, "learning_rate": 2.7620689655172417e-06, "loss": 0.0042, "step": 11000 }, { "epoch": 3.0669333333333335, "eval_loss": 0.24030156433582306, "eval_runtime": 3127.3687, "eval_samples_per_second": 1.88, "eval_steps_per_second": 0.059, "eval_wer": 57.6864039295445, "step": 11000 }, { "epoch": 3.1002666666666667, "grad_norm": 0.24431835114955902, "learning_rate": 2.4179310344827587e-06, "loss": 0.0059, "step": 11500 }, { "epoch": 3.1002666666666667, "eval_loss": 0.2953338325023651, "eval_runtime": 3087.8251, "eval_samples_per_second": 1.904, "eval_steps_per_second": 0.06, "eval_wer": 64.00667715568518, "step": 11500 }, { "epoch": 3.1336, "grad_norm": 0.6591205596923828, "learning_rate": 2.073793103448276e-06, "loss": 0.0248, "step": 12000 }, { "epoch": 3.1336, "eval_loss": 0.1744297742843628, "eval_runtime": 2988.8074, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.062, "eval_wer": 51.341187305729306, "step": 12000 }, { "epoch": 3.166933333333333, "grad_norm": 0.45652803778648376, "learning_rate": 1.7289655172413794e-06, "loss": 0.0172, "step": 12500 }, { "epoch": 3.166933333333333, "eval_loss": 0.18724997341632843, "eval_runtime": 3015.7946, "eval_samples_per_second": 1.949, "eval_steps_per_second": 0.061, "eval_wer": 53.53236885528992, "step": 12500 }, { "epoch": 3.200266666666667, "grad_norm": 0.42330440878868103, "learning_rate": 1.384137931034483e-06, "loss": 0.015, "step": 13000 }, { "epoch": 3.200266666666667, "eval_loss": 0.19304682314395905, "eval_runtime": 3065.8492, "eval_samples_per_second": 1.917, "eval_steps_per_second": 0.06, "eval_wer": 54.702789823093745, "step": 13000 }, { "epoch": 4.011466666666666, "grad_norm": 0.7199889421463013, "learning_rate": 1.0393103448275863e-06, "loss": 0.0158, "step": 13500 }, { "epoch": 4.011466666666666, "eval_loss": 0.21734359860420227, "eval_runtime": 3066.4573, "eval_samples_per_second": 1.917, "eval_steps_per_second": 0.06, "eval_wer": 60.96358263939522, "step": 13500 }, { "epoch": 4.0448, "grad_norm": 0.42120951414108276, "learning_rate": 6.944827586206897e-07, "loss": 0.0028, "step": 14000 }, { "epoch": 4.0448, "eval_loss": 0.23296251893043518, "eval_runtime": 2966.4298, "eval_samples_per_second": 1.982, "eval_steps_per_second": 0.062, "eval_wer": 53.49207567443109, "step": 14000 }, { "epoch": 4.078133333333334, "grad_norm": 0.202627032995224, "learning_rate": 3.496551724137931e-07, "loss": 0.0028, "step": 14500 }, { "epoch": 4.078133333333334, "eval_loss": 0.24154414236545563, "eval_runtime": 2989.4533, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.062, "eval_wer": 53.47672589124679, "step": 14500 }, { "epoch": 4.111466666666667, "grad_norm": 2.03951358795166, "learning_rate": 4.827586206896552e-09, "loss": 0.0194, "step": 15000 }, { "epoch": 4.111466666666667, "eval_loss": 0.22199244797229767, "eval_runtime": 3069.065, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.06, "eval_wer": 57.69216009823861, "step": 15000 }, { "epoch": 4.111466666666667, "step": 15000, "total_flos": 4.898454489936691e+20, "train_loss": 0.03751766018072764, "train_runtime": 129332.0882, "train_samples_per_second": 3.711, "train_steps_per_second": 0.116 } ], "logging_steps": 500, "max_steps": 15000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.898454489936691e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }