{ "best_metric": 0.17163243889808655, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-balanced-model/checkpoint-900", "epoch": 3.314917127071823, "eval_steps": 100, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27624309392265195, "grad_norm": 4.279232025146484, "learning_rate": 0.00028799999999999995, "loss": 7.181, "step": 100 }, { "epoch": 0.27624309392265195, "eval_loss": 0.6055206656455994, "eval_runtime": 16.1135, "eval_samples_per_second": 10.488, "eval_steps_per_second": 2.669, "eval_wer": 0.5246717971933001, "step": 100 }, { "epoch": 0.5524861878453039, "grad_norm": 2.128457546234131, "learning_rate": 0.00029735130111524163, "loss": 0.5071, "step": 200 }, { "epoch": 0.5524861878453039, "eval_loss": 0.24523495137691498, "eval_runtime": 15.4101, "eval_samples_per_second": 10.967, "eval_steps_per_second": 2.79, "eval_wer": 0.3594386600271616, "step": 200 }, { "epoch": 0.8287292817679558, "grad_norm": 1.3095550537109375, "learning_rate": 0.0002945631970260223, "loss": 0.3791, "step": 300 }, { "epoch": 0.8287292817679558, "eval_loss": 0.21587276458740234, "eval_runtime": 15.3962, "eval_samples_per_second": 10.977, "eval_steps_per_second": 2.793, "eval_wer": 0.3232231779085559, "step": 300 }, { "epoch": 1.1049723756906078, "grad_norm": 4.117229461669922, "learning_rate": 0.0002917750929368029, "loss": 0.3464, "step": 400 }, { "epoch": 1.1049723756906078, "eval_loss": 0.20587413012981415, "eval_runtime": 15.3374, "eval_samples_per_second": 11.019, "eval_steps_per_second": 2.804, "eval_wer": 0.3046627433227705, "step": 400 }, { "epoch": 1.3812154696132597, "grad_norm": 1.0849329233169556, "learning_rate": 0.0002889869888475836, "loss": 0.3326, "step": 500 }, { "epoch": 1.3812154696132597, "eval_loss": 0.19188211858272552, "eval_runtime": 15.5114, "eval_samples_per_second": 10.895, "eval_steps_per_second": 2.772, "eval_wer": 0.29425079221367134, "step": 500 }, { "epoch": 1.6574585635359116, "grad_norm": 1.2119916677474976, "learning_rate": 0.00028619888475836427, "loss": 0.322, "step": 600 }, { "epoch": 1.6574585635359116, "eval_loss": 0.18680231273174286, "eval_runtime": 15.4576, "eval_samples_per_second": 10.933, "eval_steps_per_second": 2.782, "eval_wer": 0.28610230873698506, "step": 600 }, { "epoch": 1.9337016574585635, "grad_norm": 1.408340573310852, "learning_rate": 0.00028341078066914494, "loss": 0.3025, "step": 700 }, { "epoch": 1.9337016574585635, "eval_loss": 0.1849866360425949, "eval_runtime": 15.3489, "eval_samples_per_second": 11.011, "eval_steps_per_second": 2.801, "eval_wer": 0.2901765504753282, "step": 700 }, { "epoch": 2.2099447513812156, "grad_norm": 0.6426145434379578, "learning_rate": 0.0002806226765799256, "loss": 0.2939, "step": 800 }, { "epoch": 2.2099447513812156, "eval_loss": 0.17766940593719482, "eval_runtime": 15.4593, "eval_samples_per_second": 10.932, "eval_steps_per_second": 2.781, "eval_wer": 0.2698053417836125, "step": 800 }, { "epoch": 2.4861878453038675, "grad_norm": 0.5668926239013672, "learning_rate": 0.0002778345724907063, "loss": 0.2971, "step": 900 }, { "epoch": 2.4861878453038675, "eval_loss": 0.17163243889808655, "eval_runtime": 15.5806, "eval_samples_per_second": 10.847, "eval_steps_per_second": 2.76, "eval_wer": 0.2675418741511996, "step": 900 }, { "epoch": 2.7624309392265194, "grad_norm": 0.42915207147598267, "learning_rate": 0.00027504646840148696, "loss": 0.2787, "step": 1000 }, { "epoch": 2.7624309392265194, "eval_loss": 0.17503149807453156, "eval_runtime": 15.5493, "eval_samples_per_second": 10.869, "eval_steps_per_second": 2.765, "eval_wer": 0.27161611588954276, "step": 1000 }, { "epoch": 3.0386740331491713, "grad_norm": 0.994978129863739, "learning_rate": 0.00027225836431226763, "loss": 0.32, "step": 1100 }, { "epoch": 3.0386740331491713, "eval_loss": 0.1725001335144043, "eval_runtime": 15.57, "eval_samples_per_second": 10.854, "eval_steps_per_second": 2.762, "eval_wer": 0.27342688999547304, "step": 1100 }, { "epoch": 3.314917127071823, "grad_norm": 0.6422222852706909, "learning_rate": 0.0002694702602230483, "loss": 0.2738, "step": 1200 }, { "epoch": 3.314917127071823, "eval_loss": 0.18027880787849426, "eval_runtime": 15.2933, "eval_samples_per_second": 11.051, "eval_steps_per_second": 2.812, "eval_wer": 0.2544137618832051, "step": 1200 }, { "epoch": 3.314917127071823, "step": 1200, "total_flos": 4.522297158190472e+18, "train_loss": 0.9028558111190796, "train_runtime": 1060.5768, "train_samples_per_second": 40.874, "train_steps_per_second": 10.24 } ], "logging_steps": 100, "max_steps": 10860, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.522297158190472e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }