|
{ |
|
"best_metric": 0.9392894506454468, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-toigen-male-model/checkpoint-400", |
|
"epoch": 9.437054631828978, |
|
"eval_steps": 200, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2375296912114014, |
|
"grad_norm": 150.66383361816406, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 14.4659, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4750593824228028, |
|
"grad_norm": 96.092529296875, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 11.6879, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7125890736342043, |
|
"grad_norm": 94.01177215576172, |
|
"learning_rate": 1.3800000000000001e-06, |
|
"loss": 9.6203, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9501187648456056, |
|
"grad_norm": 88.87047576904297, |
|
"learning_rate": 1.8800000000000002e-06, |
|
"loss": 7.6311, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.180522565320665, |
|
"grad_norm": 77.90989685058594, |
|
"learning_rate": 2.38e-06, |
|
"loss": 6.1323, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.4180522565320666, |
|
"grad_norm": 63.12504959106445, |
|
"learning_rate": 2.88e-06, |
|
"loss": 5.5927, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.655581947743468, |
|
"grad_norm": 63.86695861816406, |
|
"learning_rate": 3.3800000000000007e-06, |
|
"loss": 5.0682, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.8931116389548692, |
|
"grad_norm": 75.34732055664062, |
|
"learning_rate": 3.88e-06, |
|
"loss": 4.343, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.8931116389548692, |
|
"eval_loss": 1.066441535949707, |
|
"eval_runtime": 125.6362, |
|
"eval_samples_per_second": 1.679, |
|
"eval_steps_per_second": 0.844, |
|
"eval_wer": 0.606701030927835, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1235154394299287, |
|
"grad_norm": 74.82162475585938, |
|
"learning_rate": 4.38e-06, |
|
"loss": 3.9738, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.36104513064133, |
|
"grad_norm": 60.270687103271484, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 3.4967, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.598574821852732, |
|
"grad_norm": 72.7863998413086, |
|
"learning_rate": 5.380000000000001e-06, |
|
"loss": 3.4653, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.836104513064133, |
|
"grad_norm": 42.029544830322266, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 3.0671, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0665083135391926, |
|
"grad_norm": 58.11145782470703, |
|
"learning_rate": 6.380000000000001e-06, |
|
"loss": 2.7671, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.304038004750594, |
|
"grad_norm": 72.86565399169922, |
|
"learning_rate": 6.88e-06, |
|
"loss": 2.1218, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.5415676959619953, |
|
"grad_norm": 29.798490524291992, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 1.8895, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.7790973871733966, |
|
"grad_norm": 62.0257453918457, |
|
"learning_rate": 7.88e-06, |
|
"loss": 1.9496, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.7790973871733966, |
|
"eval_loss": 0.9392894506454468, |
|
"eval_runtime": 129.8196, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.817, |
|
"eval_wer": 0.5695876288659794, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.009501187648456, |
|
"grad_norm": 36.066246032714844, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 1.9781, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 4.247030878859857, |
|
"grad_norm": 42.63113021850586, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 1.0441, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.484560570071259, |
|
"grad_norm": 33.34630584716797, |
|
"learning_rate": 9.38e-06, |
|
"loss": 1.1374, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 4.72209026128266, |
|
"grad_norm": 61.29568862915039, |
|
"learning_rate": 9.88e-06, |
|
"loss": 1.0169, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.959619952494061, |
|
"grad_norm": 49.31779861450195, |
|
"learning_rate": 9.957777777777779e-06, |
|
"loss": 1.3693, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 5.190023752969121, |
|
"grad_norm": 35.42589569091797, |
|
"learning_rate": 9.902222222222223e-06, |
|
"loss": 0.7001, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.427553444180522, |
|
"grad_norm": 22.584501266479492, |
|
"learning_rate": 9.846666666666668e-06, |
|
"loss": 0.5772, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 5.665083135391924, |
|
"grad_norm": 21.677404403686523, |
|
"learning_rate": 9.791111111111112e-06, |
|
"loss": 0.702, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.665083135391924, |
|
"eval_loss": 0.9410276412963867, |
|
"eval_runtime": 124.8464, |
|
"eval_samples_per_second": 1.69, |
|
"eval_steps_per_second": 0.849, |
|
"eval_wer": 0.4814432989690722, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.902612826603326, |
|
"grad_norm": 27.595264434814453, |
|
"learning_rate": 9.735555555555556e-06, |
|
"loss": 0.6341, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 6.133016627078385, |
|
"grad_norm": 15.151907920837402, |
|
"learning_rate": 9.68e-06, |
|
"loss": 0.39, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.370546318289787, |
|
"grad_norm": 63.603759765625, |
|
"learning_rate": 9.624444444444445e-06, |
|
"loss": 0.3234, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 6.608076009501188, |
|
"grad_norm": 33.50586700439453, |
|
"learning_rate": 9.56888888888889e-06, |
|
"loss": 0.2813, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.845605700712589, |
|
"grad_norm": 29.78474235534668, |
|
"learning_rate": 9.513333333333334e-06, |
|
"loss": 0.3366, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 7.076009501187649, |
|
"grad_norm": 13.300681114196777, |
|
"learning_rate": 9.457777777777778e-06, |
|
"loss": 0.2659, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.31353919239905, |
|
"grad_norm": 26.387012481689453, |
|
"learning_rate": 9.402222222222222e-06, |
|
"loss": 0.1854, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 7.551068883610451, |
|
"grad_norm": 20.7415771484375, |
|
"learning_rate": 9.346666666666666e-06, |
|
"loss": 0.2108, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.551068883610451, |
|
"eval_loss": 0.9733582735061646, |
|
"eval_runtime": 125.0244, |
|
"eval_samples_per_second": 1.688, |
|
"eval_steps_per_second": 0.848, |
|
"eval_wer": 0.4551546391752577, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.788598574821853, |
|
"grad_norm": 16.81197166442871, |
|
"learning_rate": 9.291111111111112e-06, |
|
"loss": 0.211, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 8.019002375296912, |
|
"grad_norm": 8.114081382751465, |
|
"learning_rate": 9.235555555555556e-06, |
|
"loss": 0.2144, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.256532066508314, |
|
"grad_norm": 7.230250835418701, |
|
"learning_rate": 9.180000000000002e-06, |
|
"loss": 0.1472, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 8.494061757719715, |
|
"grad_norm": 34.53820037841797, |
|
"learning_rate": 9.124444444444444e-06, |
|
"loss": 0.1506, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.731591448931116, |
|
"grad_norm": 20.564693450927734, |
|
"learning_rate": 9.06888888888889e-06, |
|
"loss": 0.1414, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 8.969121140142517, |
|
"grad_norm": 24.549924850463867, |
|
"learning_rate": 9.013333333333334e-06, |
|
"loss": 0.1398, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.199524940617577, |
|
"grad_norm": 9.88586139678955, |
|
"learning_rate": 8.957777777777778e-06, |
|
"loss": 0.0744, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 9.437054631828978, |
|
"grad_norm": 7.651462554931641, |
|
"learning_rate": 8.902222222222224e-06, |
|
"loss": 0.1073, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.437054631828978, |
|
"eval_loss": 1.0236197710037231, |
|
"eval_runtime": 125.7559, |
|
"eval_samples_per_second": 1.678, |
|
"eval_steps_per_second": 0.843, |
|
"eval_wer": 0.44484536082474224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.437054631828978, |
|
"step": 1000, |
|
"total_flos": 8.10972659515392e+18, |
|
"train_loss": 2.490273738861084, |
|
"train_runtime": 2581.6643, |
|
"train_samples_per_second": 15.494, |
|
"train_steps_per_second": 1.937 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 48, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.10972659515392e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|