|
{ |
|
"best_global_step": 1900, |
|
"best_metric": 0.21757769584655762, |
|
"best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/mms-1b-all-bemgen-combined-fusion/checkpoint-1600", |
|
"epoch": 5.0, |
|
"eval_steps": 100, |
|
"global_step": 1970, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"grad_norm": 29.435340881347656, |
|
"learning_rate": 0.00029099999999999997, |
|
"loss": 7.7297, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"eval_loss": 5.600469589233398, |
|
"eval_runtime": 68.6711, |
|
"eval_samples_per_second": 24.406, |
|
"eval_steps_per_second": 6.102, |
|
"eval_wer": 0.9995995194233079, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 12.57145881652832, |
|
"learning_rate": 0.0002845989304812834, |
|
"loss": 4.8876, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"eval_loss": 5.189175605773926, |
|
"eval_runtime": 69.1166, |
|
"eval_samples_per_second": 24.249, |
|
"eval_steps_per_second": 6.062, |
|
"eval_wer": 1.1228140435188894, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7614213197969543, |
|
"grad_norm": 5.573763847351074, |
|
"learning_rate": 0.00026855614973262027, |
|
"loss": 4.4438, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7614213197969543, |
|
"eval_loss": 4.348100662231445, |
|
"eval_runtime": 68.2882, |
|
"eval_samples_per_second": 24.543, |
|
"eval_steps_per_second": 6.136, |
|
"eval_wer": 1.1233480176211454, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"grad_norm": 2.898040294647217, |
|
"learning_rate": 0.00025267379679144383, |
|
"loss": 3.7194, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.015228426395939, |
|
"eval_loss": 3.3745944499969482, |
|
"eval_runtime": 68.4183, |
|
"eval_samples_per_second": 24.496, |
|
"eval_steps_per_second": 6.124, |
|
"eval_wer": 1.0120144173007608, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2690355329949239, |
|
"grad_norm": 2.0177152156829834, |
|
"learning_rate": 0.00023663101604278074, |
|
"loss": 3.3173, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2690355329949239, |
|
"eval_loss": 3.256007671356201, |
|
"eval_runtime": 68.7372, |
|
"eval_samples_per_second": 24.383, |
|
"eval_steps_per_second": 6.096, |
|
"eval_wer": 1.0060072086503804, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"grad_norm": 3.2718145847320557, |
|
"learning_rate": 0.00022058823529411765, |
|
"loss": 3.1776, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.5228426395939088, |
|
"eval_loss": 2.9593822956085205, |
|
"eval_runtime": 67.7629, |
|
"eval_samples_per_second": 24.733, |
|
"eval_steps_per_second": 6.183, |
|
"eval_wer": 1.0037378187157924, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7766497461928934, |
|
"grad_norm": 1.4685205221176147, |
|
"learning_rate": 0.0002045454545454545, |
|
"loss": 1.1178, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7766497461928934, |
|
"eval_loss": 0.2583008110523224, |
|
"eval_runtime": 68.257, |
|
"eval_samples_per_second": 24.554, |
|
"eval_steps_per_second": 6.139, |
|
"eval_wer": 0.48344680283006275, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"grad_norm": 0.8484176993370056, |
|
"learning_rate": 0.00018850267379679142, |
|
"loss": 0.4382, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.030456852791878, |
|
"eval_loss": 0.23566803336143494, |
|
"eval_runtime": 68.4145, |
|
"eval_samples_per_second": 24.498, |
|
"eval_steps_per_second": 6.124, |
|
"eval_wer": 0.42964891202776667, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.284263959390863, |
|
"grad_norm": 0.4717855155467987, |
|
"learning_rate": 0.0001724598930481283, |
|
"loss": 0.422, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.284263959390863, |
|
"eval_loss": 0.23215773701667786, |
|
"eval_runtime": 69.371, |
|
"eval_samples_per_second": 24.16, |
|
"eval_steps_per_second": 6.04, |
|
"eval_wer": 0.43038312641836873, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5380710659898478, |
|
"grad_norm": 1.4339512586593628, |
|
"learning_rate": 0.00015641711229946522, |
|
"loss": 0.4101, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.5380710659898478, |
|
"eval_loss": 0.23107033967971802, |
|
"eval_runtime": 68.1286, |
|
"eval_samples_per_second": 24.601, |
|
"eval_steps_per_second": 6.15, |
|
"eval_wer": 0.4306501134694967, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7918781725888326, |
|
"grad_norm": 0.7119982838630676, |
|
"learning_rate": 0.00014037433155080213, |
|
"loss": 0.3923, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.7918781725888326, |
|
"eval_loss": 0.23034636676311493, |
|
"eval_runtime": 67.7918, |
|
"eval_samples_per_second": 24.723, |
|
"eval_steps_per_second": 6.181, |
|
"eval_wer": 0.4159658256574556, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.045685279187817, |
|
"grad_norm": 0.410969614982605, |
|
"learning_rate": 0.00012433155080213902, |
|
"loss": 0.382, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.045685279187817, |
|
"eval_loss": 0.22485551238059998, |
|
"eval_runtime": 67.7668, |
|
"eval_samples_per_second": 24.732, |
|
"eval_steps_per_second": 6.183, |
|
"eval_wer": 0.40608730476571886, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.299492385786802, |
|
"grad_norm": 0.44749805331230164, |
|
"learning_rate": 0.00010828877005347593, |
|
"loss": 0.3799, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.299492385786802, |
|
"eval_loss": 0.22400638461112976, |
|
"eval_runtime": 69.3331, |
|
"eval_samples_per_second": 24.173, |
|
"eval_steps_per_second": 6.043, |
|
"eval_wer": 0.40174876518488856, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.553299492385787, |
|
"grad_norm": 0.48391538858413696, |
|
"learning_rate": 9.224598930481283e-05, |
|
"loss": 0.3733, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.553299492385787, |
|
"eval_loss": 0.2204192578792572, |
|
"eval_runtime": 68.018, |
|
"eval_samples_per_second": 24.641, |
|
"eval_steps_per_second": 6.16, |
|
"eval_wer": 0.3992123881991723, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.8071065989847717, |
|
"grad_norm": 0.7022409439086914, |
|
"learning_rate": 7.620320855614973e-05, |
|
"loss": 0.3757, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.8071065989847717, |
|
"eval_loss": 0.22058387100696564, |
|
"eval_runtime": 68.8893, |
|
"eval_samples_per_second": 24.329, |
|
"eval_steps_per_second": 6.082, |
|
"eval_wer": 0.40308370044052866, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.060913705583756, |
|
"grad_norm": 0.5409083366394043, |
|
"learning_rate": 6.016042780748663e-05, |
|
"loss": 0.3789, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.060913705583756, |
|
"eval_loss": 0.2191571146249771, |
|
"eval_runtime": 68.0976, |
|
"eval_samples_per_second": 24.612, |
|
"eval_steps_per_second": 6.153, |
|
"eval_wer": 0.4031504472033106, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.314720812182741, |
|
"grad_norm": 0.6483948230743408, |
|
"learning_rate": 4.4117647058823526e-05, |
|
"loss": 0.3635, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.314720812182741, |
|
"eval_loss": 0.21890130639076233, |
|
"eval_runtime": 68.9885, |
|
"eval_samples_per_second": 24.294, |
|
"eval_steps_per_second": 6.073, |
|
"eval_wer": 0.40515285008677077, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.568527918781726, |
|
"grad_norm": 0.45765629410743713, |
|
"learning_rate": 2.8074866310160424e-05, |
|
"loss": 0.3583, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.568527918781726, |
|
"eval_loss": 0.21793432533740997, |
|
"eval_runtime": 69.0917, |
|
"eval_samples_per_second": 24.258, |
|
"eval_steps_per_second": 6.064, |
|
"eval_wer": 0.3988786543852623, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.822335025380711, |
|
"grad_norm": 1.3359663486480713, |
|
"learning_rate": 1.2032085561497326e-05, |
|
"loss": 0.3733, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.822335025380711, |
|
"eval_loss": 0.21757769584655762, |
|
"eval_runtime": 68.6773, |
|
"eval_samples_per_second": 24.404, |
|
"eval_steps_per_second": 6.101, |
|
"eval_wer": 0.39680950473902016, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1970, |
|
"total_flos": 1.9392924694598373e+19, |
|
"train_loss": 1.6902474505042062, |
|
"train_runtime": 4165.473, |
|
"train_samples_per_second": 7.561, |
|
"train_steps_per_second": 0.473 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1970, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9392924694598373e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|