|
{ |
|
"best_metric": 0.38667929292929293, |
|
"best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/large_model_output/large-sami-22k-finetuned/outputs/checkpoint-1080", |
|
"epoch": 60.0, |
|
"eval_steps": 500, |
|
"global_step": 64800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.5049565434455872, |
|
"learning_rate": 3.32716049382716e-05, |
|
"loss": 0.126, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.1485204762139999, |
|
"eval_loss": 0.4803544282913208, |
|
"eval_runtime": 50.5555, |
|
"eval_samples_per_second": 17.604, |
|
"eval_steps_per_second": 2.215, |
|
"eval_wer": 0.38667929292929293, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.3093390464782715, |
|
"learning_rate": 6.660493827160493e-05, |
|
"loss": 0.1441, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.19503038087240035, |
|
"eval_loss": 0.6097356677055359, |
|
"eval_runtime": 49.9713, |
|
"eval_samples_per_second": 17.81, |
|
"eval_steps_per_second": 2.241, |
|
"eval_wer": 0.44239267676767674, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.067934513092041, |
|
"learning_rate": 9.99074074074074e-05, |
|
"loss": 0.1675, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.16756409623079582, |
|
"eval_loss": 0.5237330198287964, |
|
"eval_runtime": 52.1692, |
|
"eval_samples_per_second": 17.06, |
|
"eval_steps_per_second": 2.147, |
|
"eval_wer": 0.444760101010101, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 6.190335750579834, |
|
"learning_rate": 0.00013324074074074074, |
|
"loss": 0.1919, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.188435508570864, |
|
"eval_loss": 0.6256272196769714, |
|
"eval_runtime": 49.8649, |
|
"eval_samples_per_second": 17.848, |
|
"eval_steps_per_second": 2.246, |
|
"eval_wer": 0.484375, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 10.29676342010498, |
|
"learning_rate": 0.0001665432098765432, |
|
"loss": 0.2168, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.1991552635478931, |
|
"eval_loss": 0.6817235946655273, |
|
"eval_runtime": 49.9056, |
|
"eval_samples_per_second": 17.834, |
|
"eval_steps_per_second": 2.244, |
|
"eval_wer": 0.5130997474747475, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 16.29789924621582, |
|
"learning_rate": 0.00019987654320987656, |
|
"loss": 0.2411, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.20411994269624067, |
|
"eval_loss": 0.6815704703330994, |
|
"eval_runtime": 47.2826, |
|
"eval_samples_per_second": 18.823, |
|
"eval_steps_per_second": 2.369, |
|
"eval_wer": 0.5233585858585859, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 13.96838665008545, |
|
"learning_rate": 0.000233179012345679, |
|
"loss": 0.2493, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.2558662253618535, |
|
"eval_loss": 0.8295482993125916, |
|
"eval_runtime": 47.3326, |
|
"eval_samples_per_second": 18.803, |
|
"eval_steps_per_second": 2.366, |
|
"eval_wer": 0.6788194444444444, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 12.302577018737793, |
|
"learning_rate": 0.0002665123456790123, |
|
"loss": 0.2718, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.2669070789902682, |
|
"eval_loss": 0.8849073648452759, |
|
"eval_runtime": 47.1385, |
|
"eval_samples_per_second": 18.881, |
|
"eval_steps_per_second": 2.376, |
|
"eval_wer": 0.6756628787878788, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.21664512157440186, |
|
"learning_rate": 0.0002998148148148148, |
|
"loss": 0.2922, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.3401422714024601, |
|
"eval_loss": 1.0527104139328003, |
|
"eval_runtime": 49.2164, |
|
"eval_samples_per_second": 18.083, |
|
"eval_steps_per_second": 2.276, |
|
"eval_wer": 0.6721906565656566, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 7.9486260414123535, |
|
"learning_rate": 0.0003331172839506173, |
|
"loss": 0.3156, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.35758039816232773, |
|
"eval_loss": 1.0661259889602661, |
|
"eval_runtime": 48.5028, |
|
"eval_samples_per_second": 18.349, |
|
"eval_steps_per_second": 2.309, |
|
"eval_wer": 0.7528409090909091, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 8.1552095413208, |
|
"learning_rate": 0.0003664506172839506, |
|
"loss": 0.3273, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.2929654695450279, |
|
"eval_loss": 1.0082694292068481, |
|
"eval_runtime": 48.9797, |
|
"eval_samples_per_second": 18.171, |
|
"eval_steps_per_second": 2.287, |
|
"eval_wer": 0.7840909090909091, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 8.20614242553711, |
|
"learning_rate": 0.00039978395061728396, |
|
"loss": 0.3216, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.3153682754532431, |
|
"eval_loss": 1.130453109741211, |
|
"eval_runtime": 48.376, |
|
"eval_samples_per_second": 18.398, |
|
"eval_steps_per_second": 2.315, |
|
"eval_wer": 0.728219696969697, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 14.636846542358398, |
|
"learning_rate": 0.00043311728395061726, |
|
"loss": 0.3498, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.3106258953712394, |
|
"eval_loss": 1.0758916139602661, |
|
"eval_runtime": 48.0575, |
|
"eval_samples_per_second": 18.519, |
|
"eval_steps_per_second": 2.331, |
|
"eval_wer": 0.7312184343434344, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.806951999664307, |
|
"learning_rate": 0.0004664506172839506, |
|
"loss": 0.3553, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.28031912265968484, |
|
"eval_loss": 0.8731944561004639, |
|
"eval_runtime": 47.2505, |
|
"eval_samples_per_second": 18.836, |
|
"eval_steps_per_second": 2.37, |
|
"eval_wer": 0.6756628787878788, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.5450202822685242, |
|
"learning_rate": 0.0004997530864197531, |
|
"loss": 0.3582, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.31852986217457885, |
|
"eval_loss": 1.055077075958252, |
|
"eval_runtime": 46.8181, |
|
"eval_samples_per_second": 19.01, |
|
"eval_steps_per_second": 2.392, |
|
"eval_wer": 0.7623106060606061, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.1030144691467285, |
|
"learning_rate": 0.0004889814814814815, |
|
"loss": 0.3607, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.3101071975497703, |
|
"eval_loss": 1.0534826517105103, |
|
"eval_runtime": 47.5102, |
|
"eval_samples_per_second": 18.733, |
|
"eval_steps_per_second": 2.357, |
|
"eval_wer": 0.7482638888888888, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.22218887507915497, |
|
"learning_rate": 0.0004778703703703704, |
|
"loss": 0.3447, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.30813120584893544, |
|
"eval_loss": 1.064017415046692, |
|
"eval_runtime": 48.3671, |
|
"eval_samples_per_second": 18.401, |
|
"eval_steps_per_second": 2.316, |
|
"eval_wer": 0.7369002525252525, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.14536279439926147, |
|
"learning_rate": 0.00046675925925925926, |
|
"loss": 0.325, |
|
"step": 19440 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.2905448797115052, |
|
"eval_loss": 1.0327048301696777, |
|
"eval_runtime": 48.9592, |
|
"eval_samples_per_second": 18.178, |
|
"eval_steps_per_second": 2.288, |
|
"eval_wer": 0.7534722222222222, |
|
"step": 19440 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 1.5726815462112427, |
|
"learning_rate": 0.00045564814814814817, |
|
"loss": 0.3022, |
|
"step": 20520 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.2886923874919725, |
|
"eval_loss": 0.9869930148124695, |
|
"eval_runtime": 49.3541, |
|
"eval_samples_per_second": 18.033, |
|
"eval_steps_per_second": 2.269, |
|
"eval_wer": 0.7231691919191919, |
|
"step": 20520 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.41919103264808655, |
|
"learning_rate": 0.00044454732510288065, |
|
"loss": 0.2825, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.28056612162228917, |
|
"eval_loss": 0.9183225035667419, |
|
"eval_runtime": 49.2359, |
|
"eval_samples_per_second": 18.076, |
|
"eval_steps_per_second": 2.275, |
|
"eval_wer": 0.686395202020202, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 12.236234664916992, |
|
"learning_rate": 0.0004334362139917696, |
|
"loss": 0.2706, |
|
"step": 22680 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.28604949859210593, |
|
"eval_loss": 0.9366316795349121, |
|
"eval_runtime": 49.1391, |
|
"eval_samples_per_second": 18.112, |
|
"eval_steps_per_second": 2.279, |
|
"eval_wer": 0.6811868686868687, |
|
"step": 22680 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 4.797195911407471, |
|
"learning_rate": 0.0004223353909465021, |
|
"loss": 0.2507, |
|
"step": 23760 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.2608062046139406, |
|
"eval_loss": 0.9585080146789551, |
|
"eval_runtime": 48.7093, |
|
"eval_samples_per_second": 18.272, |
|
"eval_steps_per_second": 2.299, |
|
"eval_wer": 0.6941287878787878, |
|
"step": 23760 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 4.625443935394287, |
|
"learning_rate": 0.00041122427983539094, |
|
"loss": 0.237, |
|
"step": 24840 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.28024502297090353, |
|
"eval_loss": 1.010016918182373, |
|
"eval_runtime": 50.1358, |
|
"eval_samples_per_second": 17.752, |
|
"eval_steps_per_second": 2.234, |
|
"eval_wer": 0.6797664141414141, |
|
"step": 24840 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.49481087923049927, |
|
"learning_rate": 0.00040011316872427984, |
|
"loss": 0.2298, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.24492417131848046, |
|
"eval_loss": 0.9184597730636597, |
|
"eval_runtime": 48.7455, |
|
"eval_samples_per_second": 18.258, |
|
"eval_steps_per_second": 2.298, |
|
"eval_wer": 0.6349431818181818, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 1.7336276769638062, |
|
"learning_rate": 0.0003890123456790123, |
|
"loss": 0.221, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.27846663044015213, |
|
"eval_loss": 0.9352790713310242, |
|
"eval_runtime": 48.8906, |
|
"eval_samples_per_second": 18.204, |
|
"eval_steps_per_second": 2.291, |
|
"eval_wer": 0.6579861111111112, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"grad_norm": 0.02212027832865715, |
|
"learning_rate": 0.0003779012345679013, |
|
"loss": 0.2052, |
|
"step": 28080 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.2507039470434224, |
|
"eval_loss": 0.8651528358459473, |
|
"eval_runtime": 49.0769, |
|
"eval_samples_per_second": 18.135, |
|
"eval_steps_per_second": 2.282, |
|
"eval_wer": 0.6493055555555556, |
|
"step": 28080 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"grad_norm": 2.215277910232544, |
|
"learning_rate": 0.0003667901234567901, |
|
"loss": 0.1928, |
|
"step": 29160 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.2630785950699007, |
|
"eval_loss": 0.8858852386474609, |
|
"eval_runtime": 49.657, |
|
"eval_samples_per_second": 17.923, |
|
"eval_steps_per_second": 2.255, |
|
"eval_wer": 0.6775568181818182, |
|
"step": 29160 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.10988181829452515, |
|
"learning_rate": 0.000355679012345679, |
|
"loss": 0.1889, |
|
"step": 30240 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.2666353801314034, |
|
"eval_loss": 0.9239539504051208, |
|
"eval_runtime": 49.2302, |
|
"eval_samples_per_second": 18.078, |
|
"eval_steps_per_second": 2.275, |
|
"eval_wer": 0.6636679292929293, |
|
"step": 30240 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"grad_norm": 0.5829525589942932, |
|
"learning_rate": 0.0003445781893004115, |
|
"loss": 0.1771, |
|
"step": 31320 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.24934545274909845, |
|
"eval_loss": 0.9042806625366211, |
|
"eval_runtime": 52.6225, |
|
"eval_samples_per_second": 16.913, |
|
"eval_steps_per_second": 2.128, |
|
"eval_wer": 0.6256313131313131, |
|
"step": 31320 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 3.2479238510131836, |
|
"learning_rate": 0.00033346707818930046, |
|
"loss": 0.163, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.26213999901200413, |
|
"eval_loss": 0.9130964875221252, |
|
"eval_runtime": 50.9345, |
|
"eval_samples_per_second": 17.473, |
|
"eval_steps_per_second": 2.199, |
|
"eval_wer": 0.6504103535353535, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"grad_norm": 2.047846555709839, |
|
"learning_rate": 0.0003223559670781893, |
|
"loss": 0.1603, |
|
"step": 33480 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_cer": 0.24055228968038334, |
|
"eval_loss": 0.8102329969406128, |
|
"eval_runtime": 50.6115, |
|
"eval_samples_per_second": 17.585, |
|
"eval_steps_per_second": 2.213, |
|
"eval_wer": 0.6319444444444444, |
|
"step": 33480 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.3893296420574188, |
|
"learning_rate": 0.0003112448559670782, |
|
"loss": 0.1447, |
|
"step": 34560 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.2447512720446574, |
|
"eval_loss": 0.9245155453681946, |
|
"eval_runtime": 51.908, |
|
"eval_samples_per_second": 17.146, |
|
"eval_steps_per_second": 2.158, |
|
"eval_wer": 0.6336805555555556, |
|
"step": 34560 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"grad_norm": 2.6302273273468018, |
|
"learning_rate": 0.0003001440329218107, |
|
"loss": 0.1418, |
|
"step": 35640 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_cer": 0.25300103739564295, |
|
"eval_loss": 0.9590283632278442, |
|
"eval_runtime": 52.0031, |
|
"eval_samples_per_second": 17.114, |
|
"eval_steps_per_second": 2.154, |
|
"eval_wer": 0.6235795454545454, |
|
"step": 35640 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"grad_norm": 3.61879301071167, |
|
"learning_rate": 0.0002890432098765432, |
|
"loss": 0.1415, |
|
"step": 36720 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_cer": 0.2578916168552092, |
|
"eval_loss": 0.92754727602005, |
|
"eval_runtime": 52.0318, |
|
"eval_samples_per_second": 17.105, |
|
"eval_steps_per_second": 2.153, |
|
"eval_wer": 0.634469696969697, |
|
"step": 36720 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"grad_norm": 6.908621311187744, |
|
"learning_rate": 0.00027793209876543213, |
|
"loss": 0.1313, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_cer": 0.24981475077804674, |
|
"eval_loss": 0.8644362688064575, |
|
"eval_runtime": 53.8225, |
|
"eval_samples_per_second": 16.536, |
|
"eval_steps_per_second": 2.081, |
|
"eval_wer": 0.6279987373737373, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 2.5687201023101807, |
|
"learning_rate": 0.000266820987654321, |
|
"loss": 0.1285, |
|
"step": 38880 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 0.26505458677073557, |
|
"eval_loss": 0.9070570468902588, |
|
"eval_runtime": 55.322, |
|
"eval_samples_per_second": 16.088, |
|
"eval_steps_per_second": 2.025, |
|
"eval_wer": 0.625, |
|
"step": 38880 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"grad_norm": 0.1792680323123932, |
|
"learning_rate": 0.0002557098765432099, |
|
"loss": 0.1204, |
|
"step": 39960 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_cer": 0.2386503976683298, |
|
"eval_loss": 0.8658037185668945, |
|
"eval_runtime": 54.276, |
|
"eval_samples_per_second": 16.398, |
|
"eval_steps_per_second": 2.064, |
|
"eval_wer": 0.6092171717171717, |
|
"step": 39960 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 0.05945800244808197, |
|
"learning_rate": 0.0002445987654320988, |
|
"loss": 0.1116, |
|
"step": 41040 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_cer": 0.24588746727263747, |
|
"eval_loss": 0.8684060573577881, |
|
"eval_runtime": 55.9431, |
|
"eval_samples_per_second": 15.909, |
|
"eval_steps_per_second": 2.002, |
|
"eval_wer": 0.6267361111111112, |
|
"step": 41040 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"grad_norm": 2.164262056350708, |
|
"learning_rate": 0.00023349794238683127, |
|
"loss": 0.102, |
|
"step": 42120 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_cer": 0.24102158770933163, |
|
"eval_loss": 0.9792320728302002, |
|
"eval_runtime": 54.7942, |
|
"eval_samples_per_second": 16.243, |
|
"eval_steps_per_second": 2.044, |
|
"eval_wer": 0.6245265151515151, |
|
"step": 42120 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 7.841192722320557, |
|
"learning_rate": 0.00022238683127572017, |
|
"loss": 0.0966, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_cer": 0.2466037642641901, |
|
"eval_loss": 0.8880752325057983, |
|
"eval_runtime": 57.0632, |
|
"eval_samples_per_second": 15.597, |
|
"eval_steps_per_second": 1.963, |
|
"eval_wer": 0.6163194444444444, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"grad_norm": 0.5480403304100037, |
|
"learning_rate": 0.00021128600823045268, |
|
"loss": 0.0934, |
|
"step": 44280 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_cer": 0.23398211727510745, |
|
"eval_loss": 0.8669174909591675, |
|
"eval_runtime": 56.5233, |
|
"eval_samples_per_second": 15.746, |
|
"eval_steps_per_second": 1.981, |
|
"eval_wer": 0.5970643939393939, |
|
"step": 44280 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"grad_norm": 2.996035099029541, |
|
"learning_rate": 0.00020017489711934155, |
|
"loss": 0.0847, |
|
"step": 45360 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_cer": 0.2370696043076619, |
|
"eval_loss": 0.9717867970466614, |
|
"eval_runtime": 55.4728, |
|
"eval_samples_per_second": 16.044, |
|
"eval_steps_per_second": 2.019, |
|
"eval_wer": 0.6207386363636364, |
|
"step": 45360 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"grad_norm": 0.41690441966056824, |
|
"learning_rate": 0.00018907407407407406, |
|
"loss": 0.0828, |
|
"step": 46440 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_cer": 0.2392925949711011, |
|
"eval_loss": 0.957336962223053, |
|
"eval_runtime": 54.9772, |
|
"eval_samples_per_second": 16.189, |
|
"eval_steps_per_second": 2.037, |
|
"eval_wer": 0.6223169191919192, |
|
"step": 46440 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.07533986121416092, |
|
"learning_rate": 0.0001779732510288066, |
|
"loss": 0.0727, |
|
"step": 47520 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_cer": 0.2357605098058588, |
|
"eval_loss": 0.9871988892555237, |
|
"eval_runtime": 57.6886, |
|
"eval_samples_per_second": 15.428, |
|
"eval_steps_per_second": 1.941, |
|
"eval_wer": 0.6096906565656566, |
|
"step": 47520 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"grad_norm": 0.7598063945770264, |
|
"learning_rate": 0.00016686213991769547, |
|
"loss": 0.0701, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_cer": 0.24457837277083436, |
|
"eval_loss": 0.9421331882476807, |
|
"eval_runtime": 55.063, |
|
"eval_samples_per_second": 16.163, |
|
"eval_steps_per_second": 2.034, |
|
"eval_wer": 0.6115845959595959, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"grad_norm": 0.43303415179252625, |
|
"learning_rate": 0.00015575102880658438, |
|
"loss": 0.0648, |
|
"step": 49680 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_cer": 0.24672726374549228, |
|
"eval_loss": 0.9590614438056946, |
|
"eval_runtime": 57.1789, |
|
"eval_samples_per_second": 15.565, |
|
"eval_steps_per_second": 1.959, |
|
"eval_wer": 0.6043244949494949, |
|
"step": 49680 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"grad_norm": 6.171388626098633, |
|
"learning_rate": 0.00014463991769547325, |
|
"loss": 0.0634, |
|
"step": 50760 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_cer": 0.23551351084325445, |
|
"eval_loss": 0.9990620017051697, |
|
"eval_runtime": 55.5622, |
|
"eval_samples_per_second": 16.018, |
|
"eval_steps_per_second": 2.016, |
|
"eval_wer": 0.6109532828282829, |
|
"step": 50760 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.05001814663410187, |
|
"learning_rate": 0.0001335390946502058, |
|
"loss": 0.0573, |
|
"step": 51840 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_cer": 0.23452551499283703, |
|
"eval_loss": 0.9873119592666626, |
|
"eval_runtime": 55.0833, |
|
"eval_samples_per_second": 16.157, |
|
"eval_steps_per_second": 2.033, |
|
"eval_wer": 0.6054292929292929, |
|
"step": 51840 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"grad_norm": 3.651003360748291, |
|
"learning_rate": 0.00012242798353909466, |
|
"loss": 0.0527, |
|
"step": 52920 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_cer": 0.23247542360322088, |
|
"eval_loss": 0.9885514974594116, |
|
"eval_runtime": 52.5162, |
|
"eval_samples_per_second": 16.947, |
|
"eval_steps_per_second": 2.133, |
|
"eval_wer": 0.5935921717171717, |
|
"step": 52920 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 3.5055177211761475, |
|
"learning_rate": 0.00011131687242798354, |
|
"loss": 0.0506, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_cer": 0.22867163957911377, |
|
"eval_loss": 1.0199133157730103, |
|
"eval_runtime": 51.406, |
|
"eval_samples_per_second": 17.313, |
|
"eval_steps_per_second": 2.179, |
|
"eval_wer": 0.5940656565656566, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"grad_norm": 0.08695941418409348, |
|
"learning_rate": 0.00010020576131687243, |
|
"loss": 0.0486, |
|
"step": 55080 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_cer": 0.22634984933063282, |
|
"eval_loss": 1.0691256523132324, |
|
"eval_runtime": 54.2523, |
|
"eval_samples_per_second": 16.405, |
|
"eval_steps_per_second": 2.064, |
|
"eval_wer": 0.5880681818181818, |
|
"step": 55080 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 0.4256766438484192, |
|
"learning_rate": 8.909465020576133e-05, |
|
"loss": 0.0447, |
|
"step": 56160 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_cer": 0.22963493553327077, |
|
"eval_loss": 1.0140999555587769, |
|
"eval_runtime": 58.925, |
|
"eval_samples_per_second": 15.104, |
|
"eval_steps_per_second": 1.901, |
|
"eval_wer": 0.5893308080808081, |
|
"step": 56160 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"grad_norm": 3.884925365447998, |
|
"learning_rate": 7.799382716049382e-05, |
|
"loss": 0.0419, |
|
"step": 57240 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_cer": 0.2279306426913007, |
|
"eval_loss": 1.0658098459243774, |
|
"eval_runtime": 50.8901, |
|
"eval_samples_per_second": 17.489, |
|
"eval_steps_per_second": 2.201, |
|
"eval_wer": 0.5872790404040404, |
|
"step": 57240 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"grad_norm": 0.5678676962852478, |
|
"learning_rate": 6.690329218106995e-05, |
|
"loss": 0.0376, |
|
"step": 58320 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_cer": 0.2253618534802154, |
|
"eval_loss": 1.144079327583313, |
|
"eval_runtime": 52.3564, |
|
"eval_samples_per_second": 16.999, |
|
"eval_steps_per_second": 2.139, |
|
"eval_wer": 0.5888573232323232, |
|
"step": 58320 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"grad_norm": 1.0211379528045654, |
|
"learning_rate": 5.579218106995885e-05, |
|
"loss": 0.0355, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_cer": 0.22486785555500666, |
|
"eval_loss": 1.146174430847168, |
|
"eval_runtime": 50.8316, |
|
"eval_samples_per_second": 17.509, |
|
"eval_steps_per_second": 2.203, |
|
"eval_wer": 0.5880681818181818, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 0.02778603509068489, |
|
"learning_rate": 4.468106995884774e-05, |
|
"loss": 0.0335, |
|
"step": 60480 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_cer": 0.22442325742231883, |
|
"eval_loss": 1.1712491512298584, |
|
"eval_runtime": 51.7561, |
|
"eval_samples_per_second": 17.196, |
|
"eval_steps_per_second": 2.164, |
|
"eval_wer": 0.5860164141414141, |
|
"step": 60480 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"grad_norm": 0.13397055864334106, |
|
"learning_rate": 3.3569958847736626e-05, |
|
"loss": 0.0296, |
|
"step": 61560 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_cer": 0.22180506841871264, |
|
"eval_loss": 1.162169337272644, |
|
"eval_runtime": 51.0452, |
|
"eval_samples_per_second": 17.436, |
|
"eval_steps_per_second": 2.194, |
|
"eval_wer": 0.5785984848484849, |
|
"step": 61560 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"grad_norm": 0.001944132731296122, |
|
"learning_rate": 2.246913580246914e-05, |
|
"loss": 0.0301, |
|
"step": 62640 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_cer": 0.22350936126068272, |
|
"eval_loss": 1.170377492904663, |
|
"eval_runtime": 50.364, |
|
"eval_samples_per_second": 17.671, |
|
"eval_steps_per_second": 2.224, |
|
"eval_wer": 0.5839646464646465, |
|
"step": 62640 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"grad_norm": 0.18270032107830048, |
|
"learning_rate": 1.1358024691358025e-05, |
|
"loss": 0.0283, |
|
"step": 63720 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_cer": 0.22133577038976437, |
|
"eval_loss": 1.1973356008529663, |
|
"eval_runtime": 50.9914, |
|
"eval_samples_per_second": 17.454, |
|
"eval_steps_per_second": 2.196, |
|
"eval_wer": 0.5804924242424242, |
|
"step": 63720 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 0.00017149873019661754, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.0245, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_cer": 0.2198290767178778, |
|
"eval_loss": 1.1907662153244019, |
|
"eval_runtime": 51.7658, |
|
"eval_samples_per_second": 17.193, |
|
"eval_steps_per_second": 2.164, |
|
"eval_wer": 0.5762310606060606, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 64800, |
|
"total_flos": 1.8440987587856836e+20, |
|
"train_loss": 0.1667554270485301, |
|
"train_runtime": 81955.137, |
|
"train_samples_per_second": 12.641, |
|
"train_steps_per_second": 0.791 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 64800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8440987587856836e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|