|
{ |
|
"best_metric": 0.5064709595959596, |
|
"best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/base_model_output/wav2vec2-base-sami-cont-pt-22k/outputs/shuff_100_no_idv/wr15/checkpoint-7256", |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 43536, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 953.8694458007812, |
|
"learning_rate": 5.5341173588141614e-05, |
|
"loss": 2898.1974, |
|
"step": 3628 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.18934940473250012, |
|
"eval_loss": 315.9604187011719, |
|
"eval_runtime": 25.3347, |
|
"eval_samples_per_second": 35.13, |
|
"eval_steps_per_second": 4.421, |
|
"eval_wer": 0.6330492424242424, |
|
"step": 3628 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 666.8331909179688, |
|
"learning_rate": 0.00011088141614602474, |
|
"loss": 757.7022, |
|
"step": 7256 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.17208417724645556, |
|
"eval_loss": 279.52197265625, |
|
"eval_runtime": 25.5129, |
|
"eval_samples_per_second": 34.884, |
|
"eval_steps_per_second": 4.39, |
|
"eval_wer": 0.5064709595959596, |
|
"step": 7256 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 236.5342559814453, |
|
"learning_rate": 0.0001664216587039079, |
|
"loss": 662.3705, |
|
"step": 10884 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.17640665909203182, |
|
"eval_loss": 301.2926330566406, |
|
"eval_runtime": 25.9701, |
|
"eval_samples_per_second": 34.27, |
|
"eval_steps_per_second": 4.313, |
|
"eval_wer": 0.525094696969697, |
|
"step": 10884 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 595.1166381835938, |
|
"learning_rate": 0.000221961901261791, |
|
"loss": 647.4321, |
|
"step": 14512 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.2026873487131354, |
|
"eval_loss": 309.8589172363281, |
|
"eval_runtime": 32.0472, |
|
"eval_samples_per_second": 27.772, |
|
"eval_steps_per_second": 3.495, |
|
"eval_wer": 0.5494002525252525, |
|
"step": 14512 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 320.42572021484375, |
|
"learning_rate": 0.00027747151782432926, |
|
"loss": 659.7851, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.20700983055871167, |
|
"eval_loss": 309.31988525390625, |
|
"eval_runtime": 25.5466, |
|
"eval_samples_per_second": 34.838, |
|
"eval_steps_per_second": 4.384, |
|
"eval_wer": 0.5861742424242424, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1806.6942138671875, |
|
"learning_rate": 0.00033302707337988486, |
|
"loss": 692.9787, |
|
"step": 21768 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.24119448698315468, |
|
"eval_loss": 373.3182678222656, |
|
"eval_runtime": 30.9297, |
|
"eval_samples_per_second": 28.775, |
|
"eval_steps_per_second": 3.621, |
|
"eval_wer": 0.6486742424242424, |
|
"step": 21768 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 305.56292724609375, |
|
"learning_rate": 0.00038855200294009556, |
|
"loss": 739.3389, |
|
"step": 25396 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.2555945265029887, |
|
"eval_loss": 379.1961975097656, |
|
"eval_runtime": 24.9788, |
|
"eval_samples_per_second": 35.63, |
|
"eval_steps_per_second": 4.484, |
|
"eval_wer": 0.6764520202020202, |
|
"step": 25396 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1148.437744140625, |
|
"learning_rate": 0.0004440769325003063, |
|
"loss": 786.0181, |
|
"step": 29024 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.2728597539890332, |
|
"eval_loss": 414.1454772949219, |
|
"eval_runtime": 25.8475, |
|
"eval_samples_per_second": 34.433, |
|
"eval_steps_per_second": 4.333, |
|
"eval_wer": 0.7021780303030303, |
|
"step": 29024 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 790.4903564453125, |
|
"learning_rate": 0.000499601862060517, |
|
"loss": 827.0145, |
|
"step": 32652 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.34004347181741834, |
|
"eval_loss": 441.2162170410156, |
|
"eval_runtime": 27.3805, |
|
"eval_samples_per_second": 32.505, |
|
"eval_steps_per_second": 4.09, |
|
"eval_wer": 0.7870896464646465, |
|
"step": 32652 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 779.6360473632812, |
|
"learning_rate": 0.0004902690403614588, |
|
"loss": 849.8929, |
|
"step": 36280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.2962752556439263, |
|
"eval_loss": 422.385498046875, |
|
"eval_runtime": 28.0021, |
|
"eval_samples_per_second": 31.783, |
|
"eval_steps_per_second": 4.0, |
|
"eval_wer": 0.7258522727272727, |
|
"step": 36280 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 399.57818603515625, |
|
"learning_rate": 0.0004804651187928314, |
|
"loss": 830.547, |
|
"step": 39908 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.3045744207874327, |
|
"eval_loss": 426.62811279296875, |
|
"eval_runtime": 24.2088, |
|
"eval_samples_per_second": 36.763, |
|
"eval_steps_per_second": 4.626, |
|
"eval_wer": 0.7506313131313131, |
|
"step": 39908 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1422.100341796875, |
|
"learning_rate": 0.0004706693041053246, |
|
"loss": 803.7191, |
|
"step": 43536 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.29839944672232377, |
|
"eval_loss": 418.475341796875, |
|
"eval_runtime": 26.6689, |
|
"eval_samples_per_second": 33.372, |
|
"eval_steps_per_second": 4.2, |
|
"eval_wer": 0.7313762626262627, |
|
"step": 43536 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 43536, |
|
"total_flos": 3.6380056389804335e+19, |
|
"train_loss": 929.5830404722528, |
|
"train_runtime": 31654.3209, |
|
"train_samples_per_second": 110.013, |
|
"train_steps_per_second": 6.877 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 217680, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 10 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.6380056389804335e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|