|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 200, |
|
"global_step": 9160, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2183406113537118, |
|
"eval_loss": 2.382188320159912, |
|
"eval_runtime": 74.685, |
|
"eval_samples_per_second": 13.617, |
|
"eval_steps_per_second": 1.714, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4366812227074236, |
|
"eval_loss": 0.27150478959083557, |
|
"eval_runtime": 75.1491, |
|
"eval_samples_per_second": 13.533, |
|
"eval_steps_per_second": 1.703, |
|
"eval_wer": 0.5093029350104822, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5458515283842795, |
|
"grad_norm": 0.9615474939346313, |
|
"learning_rate": 0.0002868211920529801, |
|
"loss": 2.7769, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6550218340611353, |
|
"eval_loss": 0.24891583621501923, |
|
"eval_runtime": 75.3374, |
|
"eval_samples_per_second": 13.499, |
|
"eval_steps_per_second": 1.699, |
|
"eval_wer": 0.4820492662473795, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8733624454148472, |
|
"eval_loss": 0.2296326756477356, |
|
"eval_runtime": 74.8314, |
|
"eval_samples_per_second": 13.591, |
|
"eval_steps_per_second": 1.711, |
|
"eval_wer": 0.46947064989517817, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.091703056768559, |
|
"grad_norm": 0.819220244884491, |
|
"learning_rate": 0.00027026490066225163, |
|
"loss": 0.6809, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.091703056768559, |
|
"eval_loss": 0.22090712189674377, |
|
"eval_runtime": 75.0858, |
|
"eval_samples_per_second": 13.544, |
|
"eval_steps_per_second": 1.705, |
|
"eval_wer": 0.4638364779874214, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3100436681222707, |
|
"eval_loss": 0.21630799770355225, |
|
"eval_runtime": 75.4504, |
|
"eval_samples_per_second": 13.479, |
|
"eval_steps_per_second": 1.696, |
|
"eval_wer": 0.44693396226415094, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5283842794759825, |
|
"eval_loss": 0.20916949212551117, |
|
"eval_runtime": 75.3429, |
|
"eval_samples_per_second": 13.498, |
|
"eval_steps_per_second": 1.699, |
|
"eval_wer": 0.4399895178197065, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.6375545851528384, |
|
"grad_norm": 0.9643642902374268, |
|
"learning_rate": 0.00025370860927152316, |
|
"loss": 0.6113, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.7467248908296944, |
|
"eval_loss": 0.20470178127288818, |
|
"eval_runtime": 74.8687, |
|
"eval_samples_per_second": 13.584, |
|
"eval_steps_per_second": 1.71, |
|
"eval_wer": 0.4346174004192872, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.965065502183406, |
|
"eval_loss": 0.20738434791564941, |
|
"eval_runtime": 75.3186, |
|
"eval_samples_per_second": 13.503, |
|
"eval_steps_per_second": 1.699, |
|
"eval_wer": 0.4466719077568134, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.183406113537118, |
|
"grad_norm": 0.6018229126930237, |
|
"learning_rate": 0.00023715231788079468, |
|
"loss": 0.5974, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.183406113537118, |
|
"eval_loss": 0.20408853888511658, |
|
"eval_runtime": 75.0651, |
|
"eval_samples_per_second": 13.548, |
|
"eval_steps_per_second": 1.705, |
|
"eval_wer": 0.43042452830188677, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.4017467248908297, |
|
"eval_loss": 0.20539002120494843, |
|
"eval_runtime": 74.9069, |
|
"eval_samples_per_second": 13.577, |
|
"eval_steps_per_second": 1.709, |
|
"eval_wer": 0.4317348008385744, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.6200873362445414, |
|
"eval_loss": 0.19870109856128693, |
|
"eval_runtime": 75.2079, |
|
"eval_samples_per_second": 13.523, |
|
"eval_steps_per_second": 1.702, |
|
"eval_wer": 0.4240041928721174, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.7292576419213974, |
|
"grad_norm": 0.7469688653945923, |
|
"learning_rate": 0.0002205960264900662, |
|
"loss": 0.5636, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.8384279475982535, |
|
"eval_loss": 0.20032186806201935, |
|
"eval_runtime": 75.3206, |
|
"eval_samples_per_second": 13.502, |
|
"eval_steps_per_second": 1.699, |
|
"eval_wer": 0.42518343815513626, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.056768558951965, |
|
"eval_loss": 0.19965218007564545, |
|
"eval_runtime": 74.7464, |
|
"eval_samples_per_second": 13.606, |
|
"eval_steps_per_second": 1.712, |
|
"eval_wer": 0.42872117400419285, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.2751091703056767, |
|
"grad_norm": 0.8557049036026001, |
|
"learning_rate": 0.00020403973509933773, |
|
"loss": 0.5398, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.2751091703056767, |
|
"eval_loss": 0.20972934365272522, |
|
"eval_runtime": 74.9379, |
|
"eval_samples_per_second": 13.571, |
|
"eval_steps_per_second": 1.708, |
|
"eval_wer": 0.4399895178197065, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.493449781659389, |
|
"eval_loss": 0.19680900871753693, |
|
"eval_runtime": 74.7045, |
|
"eval_samples_per_second": 13.614, |
|
"eval_steps_per_second": 1.713, |
|
"eval_wer": 0.4165356394129979, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.7117903930131004, |
|
"eval_loss": 0.20125041902065277, |
|
"eval_runtime": 74.9279, |
|
"eval_samples_per_second": 13.573, |
|
"eval_steps_per_second": 1.708, |
|
"eval_wer": 0.42177672955974843, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.8209606986899565, |
|
"grad_norm": 0.7604547739028931, |
|
"learning_rate": 0.00018748344370860925, |
|
"loss": 0.5334, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.930131004366812, |
|
"eval_loss": 0.20029041171073914, |
|
"eval_runtime": 74.9149, |
|
"eval_samples_per_second": 13.575, |
|
"eval_steps_per_second": 1.709, |
|
"eval_wer": 0.4229559748427673, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.148471615720524, |
|
"eval_loss": 0.19763976335525513, |
|
"eval_runtime": 75.2231, |
|
"eval_samples_per_second": 13.52, |
|
"eval_steps_per_second": 1.702, |
|
"eval_wer": 0.4226939203354298, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.366812227074236, |
|
"grad_norm": 1.0875004529953003, |
|
"learning_rate": 0.00017092715231788077, |
|
"loss": 0.5123, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.366812227074236, |
|
"eval_loss": 0.19775792956352234, |
|
"eval_runtime": 75.5854, |
|
"eval_samples_per_second": 13.455, |
|
"eval_steps_per_second": 1.693, |
|
"eval_wer": 0.419811320754717, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.585152838427947, |
|
"eval_loss": 0.201936736702919, |
|
"eval_runtime": 74.8299, |
|
"eval_samples_per_second": 13.591, |
|
"eval_steps_per_second": 1.711, |
|
"eval_wer": 0.429769392033543, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.8034934497816595, |
|
"eval_loss": 0.19385558366775513, |
|
"eval_runtime": 75.489, |
|
"eval_samples_per_second": 13.472, |
|
"eval_steps_per_second": 1.696, |
|
"eval_wer": 0.41457023060796644, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.9126637554585155, |
|
"grad_norm": 1.0764209032058716, |
|
"learning_rate": 0.0001543708609271523, |
|
"loss": 0.5119, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.021834061135372, |
|
"eval_loss": 0.1989189237356186, |
|
"eval_runtime": 75.1857, |
|
"eval_samples_per_second": 13.527, |
|
"eval_steps_per_second": 1.702, |
|
"eval_wer": 0.4161425576519916, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.240174672489083, |
|
"eval_loss": 0.1901673972606659, |
|
"eval_runtime": 75.3755, |
|
"eval_samples_per_second": 13.492, |
|
"eval_steps_per_second": 1.698, |
|
"eval_wer": 0.407625786163522, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.458515283842795, |
|
"grad_norm": 0.5494298338890076, |
|
"learning_rate": 0.00013781456953642382, |
|
"loss": 0.4929, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.458515283842795, |
|
"eval_loss": 0.1928589940071106, |
|
"eval_runtime": 75.5607, |
|
"eval_samples_per_second": 13.459, |
|
"eval_steps_per_second": 1.694, |
|
"eval_wer": 0.4115566037735849, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.676855895196507, |
|
"eval_loss": 0.19432277977466583, |
|
"eval_runtime": 75.57, |
|
"eval_samples_per_second": 13.458, |
|
"eval_steps_per_second": 1.694, |
|
"eval_wer": 0.4144392033542977, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.895196506550218, |
|
"eval_loss": 0.19218797981739044, |
|
"eval_runtime": 75.4593, |
|
"eval_samples_per_second": 13.477, |
|
"eval_steps_per_second": 1.696, |
|
"eval_wer": 0.4106394129979036, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.004366812227074, |
|
"grad_norm": 0.40776267647743225, |
|
"learning_rate": 0.00012125827814569536, |
|
"loss": 0.4878, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.11353711790393, |
|
"eval_loss": 0.19328303635120392, |
|
"eval_runtime": 76.1865, |
|
"eval_samples_per_second": 13.349, |
|
"eval_steps_per_second": 1.68, |
|
"eval_wer": 0.41365303983228513, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.331877729257642, |
|
"eval_loss": 0.19196276366710663, |
|
"eval_runtime": 75.3609, |
|
"eval_samples_per_second": 13.495, |
|
"eval_steps_per_second": 1.698, |
|
"eval_wer": 0.40579140461215935, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.550218340611353, |
|
"grad_norm": 0.5017375946044922, |
|
"learning_rate": 0.00010470198675496688, |
|
"loss": 0.4755, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.550218340611353, |
|
"eval_loss": 0.19270355999469757, |
|
"eval_runtime": 75.2917, |
|
"eval_samples_per_second": 13.507, |
|
"eval_steps_per_second": 1.7, |
|
"eval_wer": 0.41705974842767296, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.7685589519650655, |
|
"eval_loss": 0.19197432696819305, |
|
"eval_runtime": 75.4413, |
|
"eval_samples_per_second": 13.481, |
|
"eval_steps_per_second": 1.697, |
|
"eval_wer": 0.41273584905660377, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 6.986899563318778, |
|
"eval_loss": 0.19249393045902252, |
|
"eval_runtime": 75.5242, |
|
"eval_samples_per_second": 13.466, |
|
"eval_steps_per_second": 1.695, |
|
"eval_wer": 0.40605345911949686, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.096069868995633, |
|
"grad_norm": 0.8388169407844543, |
|
"learning_rate": 8.81456953642384e-05, |
|
"loss": 0.475, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.205240174672489, |
|
"eval_loss": 0.18842807412147522, |
|
"eval_runtime": 75.7212, |
|
"eval_samples_per_second": 13.431, |
|
"eval_steps_per_second": 1.69, |
|
"eval_wer": 0.40579140461215935, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.423580786026201, |
|
"eval_loss": 0.19032706320285797, |
|
"eval_runtime": 75.2911, |
|
"eval_samples_per_second": 13.508, |
|
"eval_steps_per_second": 1.7, |
|
"eval_wer": 0.40697064989517817, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.641921397379913, |
|
"grad_norm": 1.298710584640503, |
|
"learning_rate": 7.158940397350993e-05, |
|
"loss": 0.4715, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.641921397379913, |
|
"eval_loss": 0.18821676075458527, |
|
"eval_runtime": 75.1357, |
|
"eval_samples_per_second": 13.536, |
|
"eval_steps_per_second": 1.704, |
|
"eval_wer": 0.3996331236897275, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.860262008733624, |
|
"eval_loss": 0.1881488859653473, |
|
"eval_runtime": 75.7296, |
|
"eval_samples_per_second": 13.429, |
|
"eval_steps_per_second": 1.69, |
|
"eval_wer": 0.4033018867924528, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.078602620087336, |
|
"eval_loss": 0.1884731650352478, |
|
"eval_runtime": 75.2978, |
|
"eval_samples_per_second": 13.506, |
|
"eval_steps_per_second": 1.7, |
|
"eval_wer": 0.40068134171907754, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 8.187772925764191, |
|
"grad_norm": 0.7714540362358093, |
|
"learning_rate": 5.5033112582781456e-05, |
|
"loss": 0.4575, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.296943231441048, |
|
"eval_loss": 0.1885315328836441, |
|
"eval_runtime": 76.1765, |
|
"eval_samples_per_second": 13.351, |
|
"eval_steps_per_second": 1.68, |
|
"eval_wer": 0.4015985324947589, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.51528384279476, |
|
"eval_loss": 0.18875299394130707, |
|
"eval_runtime": 75.8312, |
|
"eval_samples_per_second": 13.411, |
|
"eval_steps_per_second": 1.688, |
|
"eval_wer": 0.40500524109014674, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 8.733624454148472, |
|
"grad_norm": 0.7762609124183655, |
|
"learning_rate": 3.850993377483443e-05, |
|
"loss": 0.4611, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.733624454148472, |
|
"eval_loss": 0.18837310373783112, |
|
"eval_runtime": 76.1442, |
|
"eval_samples_per_second": 13.356, |
|
"eval_steps_per_second": 1.681, |
|
"eval_wer": 0.40461215932914046, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.951965065502183, |
|
"eval_loss": 0.1881391853094101, |
|
"eval_runtime": 76.0237, |
|
"eval_samples_per_second": 13.377, |
|
"eval_steps_per_second": 1.684, |
|
"eval_wer": 0.39740566037735847, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 9.170305676855895, |
|
"eval_loss": 0.1864539086818695, |
|
"eval_runtime": 75.5164, |
|
"eval_samples_per_second": 13.467, |
|
"eval_steps_per_second": 1.695, |
|
"eval_wer": 0.3955712788259958, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.279475982532752, |
|
"grad_norm": 0.7670572400093079, |
|
"learning_rate": 2.1953642384105956e-05, |
|
"loss": 0.4559, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.388646288209607, |
|
"eval_loss": 0.18750794231891632, |
|
"eval_runtime": 75.8042, |
|
"eval_samples_per_second": 13.416, |
|
"eval_steps_per_second": 1.689, |
|
"eval_wer": 0.39740566037735847, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 9.606986899563319, |
|
"eval_loss": 0.1871640682220459, |
|
"eval_runtime": 75.5158, |
|
"eval_samples_per_second": 13.467, |
|
"eval_steps_per_second": 1.695, |
|
"eval_wer": 0.3996331236897275, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 9.825327510917031, |
|
"grad_norm": 0.47464126348495483, |
|
"learning_rate": 5.3973509933774825e-06, |
|
"loss": 0.4536, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.825327510917031, |
|
"eval_loss": 0.18756870925426483, |
|
"eval_runtime": 75.5452, |
|
"eval_samples_per_second": 13.462, |
|
"eval_steps_per_second": 1.694, |
|
"eval_wer": 0.3953092243186583, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 9160, |
|
"total_flos": 1.4294624233263563e+19, |
|
"train_loss": 0.6386700151268572, |
|
"train_runtime": 13601.4767, |
|
"train_samples_per_second": 5.387, |
|
"train_steps_per_second": 0.673 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4294624233263563e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|