|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/base-sami-cont-pt-22k/mixlabels/rerun/ep20/outputs/checkpoint-1525", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 30500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 599.1070556640625, |
|
"learning_rate": 9.908196721311476e-05, |
|
"loss": 5425.9882, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1144.5670166015625, |
|
"eval_runtime": 24.5176, |
|
"eval_samples_per_second": 36.3, |
|
"eval_steps_per_second": 4.568, |
|
"eval_wer": 1.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 729.23193359375, |
|
"learning_rate": 0.00019908196721311476, |
|
"loss": 4416.5207, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1142.759521484375, |
|
"eval_runtime": 26.3683, |
|
"eval_samples_per_second": 33.753, |
|
"eval_steps_per_second": 4.248, |
|
"eval_wer": 1.0, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 611.4544677734375, |
|
"learning_rate": 0.00029908196721311475, |
|
"loss": 4474.501, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1144.2608642578125, |
|
"eval_runtime": 25.6743, |
|
"eval_samples_per_second": 34.665, |
|
"eval_steps_per_second": 4.362, |
|
"eval_wer": 1.0, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 435.5348815917969, |
|
"learning_rate": 0.00039901639344262297, |
|
"loss": 4499.2403, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1163.198486328125, |
|
"eval_runtime": 24.0072, |
|
"eval_samples_per_second": 37.072, |
|
"eval_steps_per_second": 4.665, |
|
"eval_wer": 1.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 500.39129638671875, |
|
"learning_rate": 0.0004989508196721312, |
|
"loss": 4492.3184, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1145.3702392578125, |
|
"eval_runtime": 23.8651, |
|
"eval_samples_per_second": 37.293, |
|
"eval_steps_per_second": 4.693, |
|
"eval_wer": 1.0, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 480.6201171875, |
|
"learning_rate": 0.000467016393442623, |
|
"loss": 4439.5584, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1145.4093017578125, |
|
"eval_runtime": 26.4383, |
|
"eval_samples_per_second": 33.663, |
|
"eval_steps_per_second": 4.236, |
|
"eval_wer": 1.0, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 496.0490417480469, |
|
"learning_rate": 0.00043370491803278687, |
|
"loss": 4400.9305, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1144.0640869140625, |
|
"eval_runtime": 25.1926, |
|
"eval_samples_per_second": 35.328, |
|
"eval_steps_per_second": 4.446, |
|
"eval_wer": 1.0, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 500.4902038574219, |
|
"learning_rate": 0.0004003715846994536, |
|
"loss": 4388.3134, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1142.6368408203125, |
|
"eval_runtime": 25.9206, |
|
"eval_samples_per_second": 34.336, |
|
"eval_steps_per_second": 4.321, |
|
"eval_wer": 1.0, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 519.6370239257812, |
|
"learning_rate": 0.00036706010928961745, |
|
"loss": 4337.4364, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1141.1510009765625, |
|
"eval_runtime": 25.2158, |
|
"eval_samples_per_second": 35.295, |
|
"eval_steps_per_second": 4.442, |
|
"eval_wer": 1.0, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 622.8638305664062, |
|
"learning_rate": 0.00033372677595628415, |
|
"loss": 4328.8672, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1140.218017578125, |
|
"eval_runtime": 24.9601, |
|
"eval_samples_per_second": 35.657, |
|
"eval_steps_per_second": 4.487, |
|
"eval_wer": 1.0, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 536.9533081054688, |
|
"learning_rate": 0.00030041530054644814, |
|
"loss": 4315.4475, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1140.9918212890625, |
|
"eval_runtime": 24.0596, |
|
"eval_samples_per_second": 36.991, |
|
"eval_steps_per_second": 4.655, |
|
"eval_wer": 1.0, |
|
"step": 16775 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 479.0085754394531, |
|
"learning_rate": 0.0002671256830601093, |
|
"loss": 4340.9885, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1141.5726318359375, |
|
"eval_runtime": 24.2656, |
|
"eval_samples_per_second": 36.677, |
|
"eval_steps_per_second": 4.616, |
|
"eval_wer": 1.0, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 435.52410888671875, |
|
"learning_rate": 0.00023379234972677597, |
|
"loss": 4303.5944, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1140.975830078125, |
|
"eval_runtime": 25.0082, |
|
"eval_samples_per_second": 35.588, |
|
"eval_steps_per_second": 4.479, |
|
"eval_wer": 1.0, |
|
"step": 19825 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 576.5276489257812, |
|
"learning_rate": 0.00020045901639344265, |
|
"loss": 4305.4659, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1141.2747802734375, |
|
"eval_runtime": 24.5459, |
|
"eval_samples_per_second": 36.259, |
|
"eval_steps_per_second": 4.563, |
|
"eval_wer": 1.0, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 485.7404479980469, |
|
"learning_rate": 0.00016714754098360658, |
|
"loss": 4296.8341, |
|
"step": 22875 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1140.5506591796875, |
|
"eval_runtime": 27.5372, |
|
"eval_samples_per_second": 32.32, |
|
"eval_steps_per_second": 4.067, |
|
"eval_wer": 1.0, |
|
"step": 22875 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 455.784423828125, |
|
"learning_rate": 0.00013381420765027323, |
|
"loss": 4293.78, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1141.3170166015625, |
|
"eval_runtime": 23.8888, |
|
"eval_samples_per_second": 37.256, |
|
"eval_steps_per_second": 4.688, |
|
"eval_wer": 1.0, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 530.2776489257812, |
|
"learning_rate": 0.00010050273224043716, |
|
"loss": 4291.8813, |
|
"step": 25925 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1140.3837890625, |
|
"eval_runtime": 25.3819, |
|
"eval_samples_per_second": 35.064, |
|
"eval_steps_per_second": 4.413, |
|
"eval_wer": 1.0, |
|
"step": 25925 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 510.512451171875, |
|
"learning_rate": 6.716939890710383e-05, |
|
"loss": 4291.3587, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1141.1961669921875, |
|
"eval_runtime": 24.3571, |
|
"eval_samples_per_second": 36.54, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 1.0, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 453.8408508300781, |
|
"learning_rate": 3.383606557377049e-05, |
|
"loss": 4288.1016, |
|
"step": 28975 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1140.9141845703125, |
|
"eval_runtime": 23.8605, |
|
"eval_samples_per_second": 37.3, |
|
"eval_steps_per_second": 4.694, |
|
"eval_wer": 1.0, |
|
"step": 28975 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 636.82763671875, |
|
"learning_rate": 5.245901639344263e-07, |
|
"loss": 4287.0193, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 1141.5001220703125, |
|
"eval_runtime": 26.8787, |
|
"eval_samples_per_second": 33.112, |
|
"eval_steps_per_second": 4.167, |
|
"eval_wer": 1.0, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 30500, |
|
"total_flos": 3.1981440469898e+19, |
|
"train_loss": 4410.907295081967, |
|
"train_runtime": 25843.4041, |
|
"train_samples_per_second": 18.879, |
|
"train_steps_per_second": 1.18 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 30500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1981440469898e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|