|
{ |
|
"best_metric": 28.417188512200386, |
|
"best_model_checkpoint": "./v3-turbo-low-lora-8805-qkvo/v3-turbo-low-lora-8805-qkvo/checkpoint-3500", |
|
"epoch": 10.482180293501049, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0482180293501049, |
|
"grad_norm": 1.8798627853393555, |
|
"learning_rate": 9.760113134728383e-06, |
|
"loss": 1.1764, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0482180293501049, |
|
"eval_loss": 0.42681750655174255, |
|
"eval_runtime": 331.0832, |
|
"eval_samples_per_second": 2.471, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 32.19606996329087, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0964360587002098, |
|
"grad_norm": 1.6974595785140991, |
|
"learning_rate": 9.054297904161868e-06, |
|
"loss": 0.3681, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0964360587002098, |
|
"eval_loss": 0.39048299193382263, |
|
"eval_runtime": 331.8097, |
|
"eval_samples_per_second": 2.465, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 31.48348088965666, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.1446540880503147, |
|
"grad_norm": 2.770571231842041, |
|
"learning_rate": 7.951619746781474e-06, |
|
"loss": 0.3222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.1446540880503147, |
|
"eval_loss": 0.3774045407772064, |
|
"eval_runtime": 332.0608, |
|
"eval_samples_per_second": 2.463, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 30.598142949686892, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.1928721174004195, |
|
"grad_norm": 2.8018639087677, |
|
"learning_rate": 6.5600164834420754e-06, |
|
"loss": 0.3158, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.1928721174004195, |
|
"eval_loss": 0.36975201964378357, |
|
"eval_runtime": 330.6434, |
|
"eval_samples_per_second": 2.474, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 29.23774562729432, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.241090146750524, |
|
"grad_norm": 4.796682357788086, |
|
"learning_rate": 5.015707937429398e-06, |
|
"loss": 0.2983, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.241090146750524, |
|
"eval_loss": 0.36699235439300537, |
|
"eval_runtime": 332.0572, |
|
"eval_samples_per_second": 2.463, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 28.978622327790976, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.289308176100629, |
|
"grad_norm": 2.243896007537842, |
|
"learning_rate": 3.4698617890574972e-06, |
|
"loss": 0.283, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.289308176100629, |
|
"eval_loss": 0.3649148643016815, |
|
"eval_runtime": 332.7836, |
|
"eval_samples_per_second": 2.458, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 28.805873461455413, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.337526205450734, |
|
"grad_norm": 2.2685556411743164, |
|
"learning_rate": 2.0737962298724513e-06, |
|
"loss": 0.2714, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.337526205450734, |
|
"eval_loss": 0.36280229687690735, |
|
"eval_runtime": 331.6361, |
|
"eval_samples_per_second": 2.467, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 28.417188512200386, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.385744234800839, |
|
"grad_norm": 1.9245986938476562, |
|
"learning_rate": 9.641678837679985e-07, |
|
"loss": 0.263, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.385744234800839, |
|
"eval_loss": 0.36285853385925293, |
|
"eval_runtime": 332.0272, |
|
"eval_samples_per_second": 2.464, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 28.546750161952062, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.433962264150944, |
|
"grad_norm": 3.1871135234832764, |
|
"learning_rate": 2.49594904496141e-07, |
|
"loss": 0.2656, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.433962264150944, |
|
"eval_loss": 0.36288538575172424, |
|
"eval_runtime": 332.2149, |
|
"eval_samples_per_second": 2.462, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 28.762686244871517, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.482180293501049, |
|
"grad_norm": 1.539926528930664, |
|
"learning_rate": 3.55305337634837e-11, |
|
"loss": 0.2607, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.482180293501049, |
|
"eval_loss": 0.36288636922836304, |
|
"eval_runtime": 331.4383, |
|
"eval_samples_per_second": 2.468, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 28.762686244871517, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.482180293501049, |
|
"step": 5000, |
|
"total_flos": 6.84657859756032e+19, |
|
"train_loss": 0.38244903564453125, |
|
"train_runtime": 13834.795, |
|
"train_samples_per_second": 2.891, |
|
"train_steps_per_second": 0.361 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 11, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.84657859756032e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|