|
{ |
|
"best_metric": 0.17163243889808655, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-balanced-model/checkpoint-900", |
|
"epoch": 3.314917127071823, |
|
"eval_steps": 100, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27624309392265195, |
|
"grad_norm": 4.279232025146484, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 7.181, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27624309392265195, |
|
"eval_loss": 0.6055206656455994, |
|
"eval_runtime": 16.1135, |
|
"eval_samples_per_second": 10.488, |
|
"eval_steps_per_second": 2.669, |
|
"eval_wer": 0.5246717971933001, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5524861878453039, |
|
"grad_norm": 2.128457546234131, |
|
"learning_rate": 0.00029735130111524163, |
|
"loss": 0.5071, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5524861878453039, |
|
"eval_loss": 0.24523495137691498, |
|
"eval_runtime": 15.4101, |
|
"eval_samples_per_second": 10.967, |
|
"eval_steps_per_second": 2.79, |
|
"eval_wer": 0.3594386600271616, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8287292817679558, |
|
"grad_norm": 1.3095550537109375, |
|
"learning_rate": 0.0002945631970260223, |
|
"loss": 0.3791, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8287292817679558, |
|
"eval_loss": 0.21587276458740234, |
|
"eval_runtime": 15.3962, |
|
"eval_samples_per_second": 10.977, |
|
"eval_steps_per_second": 2.793, |
|
"eval_wer": 0.3232231779085559, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1049723756906078, |
|
"grad_norm": 4.117229461669922, |
|
"learning_rate": 0.0002917750929368029, |
|
"loss": 0.3464, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1049723756906078, |
|
"eval_loss": 0.20587413012981415, |
|
"eval_runtime": 15.3374, |
|
"eval_samples_per_second": 11.019, |
|
"eval_steps_per_second": 2.804, |
|
"eval_wer": 0.3046627433227705, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3812154696132597, |
|
"grad_norm": 1.0849329233169556, |
|
"learning_rate": 0.0002889869888475836, |
|
"loss": 0.3326, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3812154696132597, |
|
"eval_loss": 0.19188211858272552, |
|
"eval_runtime": 15.5114, |
|
"eval_samples_per_second": 10.895, |
|
"eval_steps_per_second": 2.772, |
|
"eval_wer": 0.29425079221367134, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6574585635359116, |
|
"grad_norm": 1.2119916677474976, |
|
"learning_rate": 0.00028619888475836427, |
|
"loss": 0.322, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6574585635359116, |
|
"eval_loss": 0.18680231273174286, |
|
"eval_runtime": 15.4576, |
|
"eval_samples_per_second": 10.933, |
|
"eval_steps_per_second": 2.782, |
|
"eval_wer": 0.28610230873698506, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9337016574585635, |
|
"grad_norm": 1.408340573310852, |
|
"learning_rate": 0.00028341078066914494, |
|
"loss": 0.3025, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9337016574585635, |
|
"eval_loss": 0.1849866360425949, |
|
"eval_runtime": 15.3489, |
|
"eval_samples_per_second": 11.011, |
|
"eval_steps_per_second": 2.801, |
|
"eval_wer": 0.2901765504753282, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2099447513812156, |
|
"grad_norm": 0.6426145434379578, |
|
"learning_rate": 0.0002806226765799256, |
|
"loss": 0.2939, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2099447513812156, |
|
"eval_loss": 0.17766940593719482, |
|
"eval_runtime": 15.4593, |
|
"eval_samples_per_second": 10.932, |
|
"eval_steps_per_second": 2.781, |
|
"eval_wer": 0.2698053417836125, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4861878453038675, |
|
"grad_norm": 0.5668926239013672, |
|
"learning_rate": 0.0002778345724907063, |
|
"loss": 0.2971, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4861878453038675, |
|
"eval_loss": 0.17163243889808655, |
|
"eval_runtime": 15.5806, |
|
"eval_samples_per_second": 10.847, |
|
"eval_steps_per_second": 2.76, |
|
"eval_wer": 0.2675418741511996, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"grad_norm": 0.42915207147598267, |
|
"learning_rate": 0.00027504646840148696, |
|
"loss": 0.2787, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"eval_loss": 0.17503149807453156, |
|
"eval_runtime": 15.5493, |
|
"eval_samples_per_second": 10.869, |
|
"eval_steps_per_second": 2.765, |
|
"eval_wer": 0.27161611588954276, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0386740331491713, |
|
"grad_norm": 0.994978129863739, |
|
"learning_rate": 0.00027225836431226763, |
|
"loss": 0.32, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0386740331491713, |
|
"eval_loss": 0.1725001335144043, |
|
"eval_runtime": 15.57, |
|
"eval_samples_per_second": 10.854, |
|
"eval_steps_per_second": 2.762, |
|
"eval_wer": 0.27342688999547304, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.314917127071823, |
|
"grad_norm": 0.6422222852706909, |
|
"learning_rate": 0.0002694702602230483, |
|
"loss": 0.2738, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.314917127071823, |
|
"eval_loss": 0.18027880787849426, |
|
"eval_runtime": 15.2933, |
|
"eval_samples_per_second": 11.051, |
|
"eval_steps_per_second": 2.812, |
|
"eval_wer": 0.2544137618832051, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.314917127071823, |
|
"step": 1200, |
|
"total_flos": 4.522297158190472e+18, |
|
"train_loss": 0.9028558111190796, |
|
"train_runtime": 1060.5768, |
|
"train_samples_per_second": 40.874, |
|
"train_steps_per_second": 10.24 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 10860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.522297158190472e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|