{ "best_metric": 0.38667929292929293, "best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/large_model_output/large-sami-22k-finetuned/outputs/checkpoint-1080", "epoch": 60.0, "eval_steps": 500, "global_step": 64800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.5049565434455872, "learning_rate": 3.32716049382716e-05, "loss": 0.126, "step": 1080 }, { "epoch": 1.0, "eval_cer": 0.1485204762139999, "eval_loss": 0.4803544282913208, "eval_runtime": 50.5555, "eval_samples_per_second": 17.604, "eval_steps_per_second": 2.215, "eval_wer": 0.38667929292929293, "step": 1080 }, { "epoch": 2.0, "grad_norm": 2.3093390464782715, "learning_rate": 6.660493827160493e-05, "loss": 0.1441, "step": 2160 }, { "epoch": 2.0, "eval_cer": 0.19503038087240035, "eval_loss": 0.6097356677055359, "eval_runtime": 49.9713, "eval_samples_per_second": 17.81, "eval_steps_per_second": 2.241, "eval_wer": 0.44239267676767674, "step": 2160 }, { "epoch": 3.0, "grad_norm": 3.067934513092041, "learning_rate": 9.99074074074074e-05, "loss": 0.1675, "step": 3240 }, { "epoch": 3.0, "eval_cer": 0.16756409623079582, "eval_loss": 0.5237330198287964, "eval_runtime": 52.1692, "eval_samples_per_second": 17.06, "eval_steps_per_second": 2.147, "eval_wer": 0.444760101010101, "step": 3240 }, { "epoch": 4.0, "grad_norm": 6.190335750579834, "learning_rate": 0.00013324074074074074, "loss": 0.1919, "step": 4320 }, { "epoch": 4.0, "eval_cer": 0.188435508570864, "eval_loss": 0.6256272196769714, "eval_runtime": 49.8649, "eval_samples_per_second": 17.848, "eval_steps_per_second": 2.246, "eval_wer": 0.484375, "step": 4320 }, { "epoch": 5.0, "grad_norm": 10.29676342010498, "learning_rate": 0.0001665432098765432, "loss": 0.2168, "step": 5400 }, { "epoch": 5.0, "eval_cer": 0.1991552635478931, "eval_loss": 0.6817235946655273, "eval_runtime": 49.9056, "eval_samples_per_second": 17.834, "eval_steps_per_second": 2.244, "eval_wer": 0.5130997474747475, "step": 5400 }, { "epoch": 6.0, "grad_norm": 16.29789924621582, "learning_rate": 0.00019987654320987656, "loss": 0.2411, "step": 6480 }, { "epoch": 6.0, "eval_cer": 0.20411994269624067, "eval_loss": 0.6815704703330994, "eval_runtime": 47.2826, "eval_samples_per_second": 18.823, "eval_steps_per_second": 2.369, "eval_wer": 0.5233585858585859, "step": 6480 }, { "epoch": 7.0, "grad_norm": 13.96838665008545, "learning_rate": 0.000233179012345679, "loss": 0.2493, "step": 7560 }, { "epoch": 7.0, "eval_cer": 0.2558662253618535, "eval_loss": 0.8295482993125916, "eval_runtime": 47.3326, "eval_samples_per_second": 18.803, "eval_steps_per_second": 2.366, "eval_wer": 0.6788194444444444, "step": 7560 }, { "epoch": 8.0, "grad_norm": 12.302577018737793, "learning_rate": 0.0002665123456790123, "loss": 0.2718, "step": 8640 }, { "epoch": 8.0, "eval_cer": 0.2669070789902682, "eval_loss": 0.8849073648452759, "eval_runtime": 47.1385, "eval_samples_per_second": 18.881, "eval_steps_per_second": 2.376, "eval_wer": 0.6756628787878788, "step": 8640 }, { "epoch": 9.0, "grad_norm": 0.21664512157440186, "learning_rate": 0.0002998148148148148, "loss": 0.2922, "step": 9720 }, { "epoch": 9.0, "eval_cer": 0.3401422714024601, "eval_loss": 1.0527104139328003, "eval_runtime": 49.2164, "eval_samples_per_second": 18.083, "eval_steps_per_second": 2.276, "eval_wer": 0.6721906565656566, "step": 9720 }, { "epoch": 10.0, "grad_norm": 7.9486260414123535, "learning_rate": 0.0003331172839506173, "loss": 0.3156, "step": 10800 }, { "epoch": 10.0, "eval_cer": 0.35758039816232773, "eval_loss": 1.0661259889602661, "eval_runtime": 48.5028, "eval_samples_per_second": 18.349, "eval_steps_per_second": 2.309, "eval_wer": 0.7528409090909091, "step": 10800 }, { "epoch": 11.0, "grad_norm": 8.1552095413208, "learning_rate": 0.0003664506172839506, "loss": 0.3273, "step": 11880 }, { "epoch": 11.0, "eval_cer": 0.2929654695450279, "eval_loss": 1.0082694292068481, "eval_runtime": 48.9797, "eval_samples_per_second": 18.171, "eval_steps_per_second": 2.287, "eval_wer": 0.7840909090909091, "step": 11880 }, { "epoch": 12.0, "grad_norm": 8.20614242553711, "learning_rate": 0.00039978395061728396, "loss": 0.3216, "step": 12960 }, { "epoch": 12.0, "eval_cer": 0.3153682754532431, "eval_loss": 1.130453109741211, "eval_runtime": 48.376, "eval_samples_per_second": 18.398, "eval_steps_per_second": 2.315, "eval_wer": 0.728219696969697, "step": 12960 }, { "epoch": 13.0, "grad_norm": 14.636846542358398, "learning_rate": 0.00043311728395061726, "loss": 0.3498, "step": 14040 }, { "epoch": 13.0, "eval_cer": 0.3106258953712394, "eval_loss": 1.0758916139602661, "eval_runtime": 48.0575, "eval_samples_per_second": 18.519, "eval_steps_per_second": 2.331, "eval_wer": 0.7312184343434344, "step": 14040 }, { "epoch": 14.0, "grad_norm": 4.806951999664307, "learning_rate": 0.0004664506172839506, "loss": 0.3553, "step": 15120 }, { "epoch": 14.0, "eval_cer": 0.28031912265968484, "eval_loss": 0.8731944561004639, "eval_runtime": 47.2505, "eval_samples_per_second": 18.836, "eval_steps_per_second": 2.37, "eval_wer": 0.6756628787878788, "step": 15120 }, { "epoch": 15.0, "grad_norm": 0.5450202822685242, "learning_rate": 0.0004997530864197531, "loss": 0.3582, "step": 16200 }, { "epoch": 15.0, "eval_cer": 0.31852986217457885, "eval_loss": 1.055077075958252, "eval_runtime": 46.8181, "eval_samples_per_second": 19.01, "eval_steps_per_second": 2.392, "eval_wer": 0.7623106060606061, "step": 16200 }, { "epoch": 16.0, "grad_norm": 5.1030144691467285, "learning_rate": 0.0004889814814814815, "loss": 0.3607, "step": 17280 }, { "epoch": 16.0, "eval_cer": 0.3101071975497703, "eval_loss": 1.0534826517105103, "eval_runtime": 47.5102, "eval_samples_per_second": 18.733, "eval_steps_per_second": 2.357, "eval_wer": 0.7482638888888888, "step": 17280 }, { "epoch": 17.0, "grad_norm": 0.22218887507915497, "learning_rate": 0.0004778703703703704, "loss": 0.3447, "step": 18360 }, { "epoch": 17.0, "eval_cer": 0.30813120584893544, "eval_loss": 1.064017415046692, "eval_runtime": 48.3671, "eval_samples_per_second": 18.401, "eval_steps_per_second": 2.316, "eval_wer": 0.7369002525252525, "step": 18360 }, { "epoch": 18.0, "grad_norm": 0.14536279439926147, "learning_rate": 0.00046675925925925926, "loss": 0.325, "step": 19440 }, { "epoch": 18.0, "eval_cer": 0.2905448797115052, "eval_loss": 1.0327048301696777, "eval_runtime": 48.9592, "eval_samples_per_second": 18.178, "eval_steps_per_second": 2.288, "eval_wer": 0.7534722222222222, "step": 19440 }, { "epoch": 19.0, "grad_norm": 1.5726815462112427, "learning_rate": 0.00045564814814814817, "loss": 0.3022, "step": 20520 }, { "epoch": 19.0, "eval_cer": 0.2886923874919725, "eval_loss": 0.9869930148124695, "eval_runtime": 49.3541, "eval_samples_per_second": 18.033, "eval_steps_per_second": 2.269, "eval_wer": 0.7231691919191919, "step": 20520 }, { "epoch": 20.0, "grad_norm": 0.41919103264808655, "learning_rate": 0.00044454732510288065, "loss": 0.2825, "step": 21600 }, { "epoch": 20.0, "eval_cer": 0.28056612162228917, "eval_loss": 0.9183225035667419, "eval_runtime": 49.2359, "eval_samples_per_second": 18.076, "eval_steps_per_second": 2.275, "eval_wer": 0.686395202020202, "step": 21600 }, { "epoch": 21.0, "grad_norm": 12.236234664916992, "learning_rate": 0.0004334362139917696, "loss": 0.2706, "step": 22680 }, { "epoch": 21.0, "eval_cer": 0.28604949859210593, "eval_loss": 0.9366316795349121, "eval_runtime": 49.1391, "eval_samples_per_second": 18.112, "eval_steps_per_second": 2.279, "eval_wer": 0.6811868686868687, "step": 22680 }, { "epoch": 22.0, "grad_norm": 4.797195911407471, "learning_rate": 0.0004223353909465021, "loss": 0.2507, "step": 23760 }, { "epoch": 22.0, "eval_cer": 0.2608062046139406, "eval_loss": 0.9585080146789551, "eval_runtime": 48.7093, "eval_samples_per_second": 18.272, "eval_steps_per_second": 2.299, "eval_wer": 0.6941287878787878, "step": 23760 }, { "epoch": 23.0, "grad_norm": 4.625443935394287, "learning_rate": 0.00041122427983539094, "loss": 0.237, "step": 24840 }, { "epoch": 23.0, "eval_cer": 0.28024502297090353, "eval_loss": 1.010016918182373, "eval_runtime": 50.1358, "eval_samples_per_second": 17.752, "eval_steps_per_second": 2.234, "eval_wer": 0.6797664141414141, "step": 24840 }, { "epoch": 24.0, "grad_norm": 0.49481087923049927, "learning_rate": 0.00040011316872427984, "loss": 0.2298, "step": 25920 }, { "epoch": 24.0, "eval_cer": 0.24492417131848046, "eval_loss": 0.9184597730636597, "eval_runtime": 48.7455, "eval_samples_per_second": 18.258, "eval_steps_per_second": 2.298, "eval_wer": 0.6349431818181818, "step": 25920 }, { "epoch": 25.0, "grad_norm": 1.7336276769638062, "learning_rate": 0.0003890123456790123, "loss": 0.221, "step": 27000 }, { "epoch": 25.0, "eval_cer": 0.27846663044015213, "eval_loss": 0.9352790713310242, "eval_runtime": 48.8906, "eval_samples_per_second": 18.204, "eval_steps_per_second": 2.291, "eval_wer": 0.6579861111111112, "step": 27000 }, { "epoch": 26.0, "grad_norm": 0.02212027832865715, "learning_rate": 0.0003779012345679013, "loss": 0.2052, "step": 28080 }, { "epoch": 26.0, "eval_cer": 0.2507039470434224, "eval_loss": 0.8651528358459473, "eval_runtime": 49.0769, "eval_samples_per_second": 18.135, "eval_steps_per_second": 2.282, "eval_wer": 0.6493055555555556, "step": 28080 }, { "epoch": 27.0, "grad_norm": 2.215277910232544, "learning_rate": 0.0003667901234567901, "loss": 0.1928, "step": 29160 }, { "epoch": 27.0, "eval_cer": 0.2630785950699007, "eval_loss": 0.8858852386474609, "eval_runtime": 49.657, "eval_samples_per_second": 17.923, "eval_steps_per_second": 2.255, "eval_wer": 0.6775568181818182, "step": 29160 }, { "epoch": 28.0, "grad_norm": 0.10988181829452515, "learning_rate": 0.000355679012345679, "loss": 0.1889, "step": 30240 }, { "epoch": 28.0, "eval_cer": 0.2666353801314034, "eval_loss": 0.9239539504051208, "eval_runtime": 49.2302, "eval_samples_per_second": 18.078, "eval_steps_per_second": 2.275, "eval_wer": 0.6636679292929293, "step": 30240 }, { "epoch": 29.0, "grad_norm": 0.5829525589942932, "learning_rate": 0.0003445781893004115, "loss": 0.1771, "step": 31320 }, { "epoch": 29.0, "eval_cer": 0.24934545274909845, "eval_loss": 0.9042806625366211, "eval_runtime": 52.6225, "eval_samples_per_second": 16.913, "eval_steps_per_second": 2.128, "eval_wer": 0.6256313131313131, "step": 31320 }, { "epoch": 30.0, "grad_norm": 3.2479238510131836, "learning_rate": 0.00033346707818930046, "loss": 0.163, "step": 32400 }, { "epoch": 30.0, "eval_cer": 0.26213999901200413, "eval_loss": 0.9130964875221252, "eval_runtime": 50.9345, "eval_samples_per_second": 17.473, "eval_steps_per_second": 2.199, "eval_wer": 0.6504103535353535, "step": 32400 }, { "epoch": 31.0, "grad_norm": 2.047846555709839, "learning_rate": 0.0003223559670781893, "loss": 0.1603, "step": 33480 }, { "epoch": 31.0, "eval_cer": 0.24055228968038334, "eval_loss": 0.8102329969406128, "eval_runtime": 50.6115, "eval_samples_per_second": 17.585, "eval_steps_per_second": 2.213, "eval_wer": 0.6319444444444444, "step": 33480 }, { "epoch": 32.0, "grad_norm": 0.3893296420574188, "learning_rate": 0.0003112448559670782, "loss": 0.1447, "step": 34560 }, { "epoch": 32.0, "eval_cer": 0.2447512720446574, "eval_loss": 0.9245155453681946, "eval_runtime": 51.908, "eval_samples_per_second": 17.146, "eval_steps_per_second": 2.158, "eval_wer": 0.6336805555555556, "step": 34560 }, { "epoch": 33.0, "grad_norm": 2.6302273273468018, "learning_rate": 0.0003001440329218107, "loss": 0.1418, "step": 35640 }, { "epoch": 33.0, "eval_cer": 0.25300103739564295, "eval_loss": 0.9590283632278442, "eval_runtime": 52.0031, "eval_samples_per_second": 17.114, "eval_steps_per_second": 2.154, "eval_wer": 0.6235795454545454, "step": 35640 }, { "epoch": 34.0, "grad_norm": 3.61879301071167, "learning_rate": 0.0002890432098765432, "loss": 0.1415, "step": 36720 }, { "epoch": 34.0, "eval_cer": 0.2578916168552092, "eval_loss": 0.92754727602005, "eval_runtime": 52.0318, "eval_samples_per_second": 17.105, "eval_steps_per_second": 2.153, "eval_wer": 0.634469696969697, "step": 36720 }, { "epoch": 35.0, "grad_norm": 6.908621311187744, "learning_rate": 0.00027793209876543213, "loss": 0.1313, "step": 37800 }, { "epoch": 35.0, "eval_cer": 0.24981475077804674, "eval_loss": 0.8644362688064575, "eval_runtime": 53.8225, "eval_samples_per_second": 16.536, "eval_steps_per_second": 2.081, "eval_wer": 0.6279987373737373, "step": 37800 }, { "epoch": 36.0, "grad_norm": 2.5687201023101807, "learning_rate": 0.000266820987654321, "loss": 0.1285, "step": 38880 }, { "epoch": 36.0, "eval_cer": 0.26505458677073557, "eval_loss": 0.9070570468902588, "eval_runtime": 55.322, "eval_samples_per_second": 16.088, "eval_steps_per_second": 2.025, "eval_wer": 0.625, "step": 38880 }, { "epoch": 37.0, "grad_norm": 0.1792680323123932, "learning_rate": 0.0002557098765432099, "loss": 0.1204, "step": 39960 }, { "epoch": 37.0, "eval_cer": 0.2386503976683298, "eval_loss": 0.8658037185668945, "eval_runtime": 54.276, "eval_samples_per_second": 16.398, "eval_steps_per_second": 2.064, "eval_wer": 0.6092171717171717, "step": 39960 }, { "epoch": 38.0, "grad_norm": 0.05945800244808197, "learning_rate": 0.0002445987654320988, "loss": 0.1116, "step": 41040 }, { "epoch": 38.0, "eval_cer": 0.24588746727263747, "eval_loss": 0.8684060573577881, "eval_runtime": 55.9431, "eval_samples_per_second": 15.909, "eval_steps_per_second": 2.002, "eval_wer": 0.6267361111111112, "step": 41040 }, { "epoch": 39.0, "grad_norm": 2.164262056350708, "learning_rate": 0.00023349794238683127, "loss": 0.102, "step": 42120 }, { "epoch": 39.0, "eval_cer": 0.24102158770933163, "eval_loss": 0.9792320728302002, "eval_runtime": 54.7942, "eval_samples_per_second": 16.243, "eval_steps_per_second": 2.044, "eval_wer": 0.6245265151515151, "step": 42120 }, { "epoch": 40.0, "grad_norm": 7.841192722320557, "learning_rate": 0.00022238683127572017, "loss": 0.0966, "step": 43200 }, { "epoch": 40.0, "eval_cer": 0.2466037642641901, "eval_loss": 0.8880752325057983, "eval_runtime": 57.0632, "eval_samples_per_second": 15.597, "eval_steps_per_second": 1.963, "eval_wer": 0.6163194444444444, "step": 43200 }, { "epoch": 41.0, "grad_norm": 0.5480403304100037, "learning_rate": 0.00021128600823045268, "loss": 0.0934, "step": 44280 }, { "epoch": 41.0, "eval_cer": 0.23398211727510745, "eval_loss": 0.8669174909591675, "eval_runtime": 56.5233, "eval_samples_per_second": 15.746, "eval_steps_per_second": 1.981, "eval_wer": 0.5970643939393939, "step": 44280 }, { "epoch": 42.0, "grad_norm": 2.996035099029541, "learning_rate": 0.00020017489711934155, "loss": 0.0847, "step": 45360 }, { "epoch": 42.0, "eval_cer": 0.2370696043076619, "eval_loss": 0.9717867970466614, "eval_runtime": 55.4728, "eval_samples_per_second": 16.044, "eval_steps_per_second": 2.019, "eval_wer": 0.6207386363636364, "step": 45360 }, { "epoch": 43.0, "grad_norm": 0.41690441966056824, "learning_rate": 0.00018907407407407406, "loss": 0.0828, "step": 46440 }, { "epoch": 43.0, "eval_cer": 0.2392925949711011, "eval_loss": 0.957336962223053, "eval_runtime": 54.9772, "eval_samples_per_second": 16.189, "eval_steps_per_second": 2.037, "eval_wer": 0.6223169191919192, "step": 46440 }, { "epoch": 44.0, "grad_norm": 0.07533986121416092, "learning_rate": 0.0001779732510288066, "loss": 0.0727, "step": 47520 }, { "epoch": 44.0, "eval_cer": 0.2357605098058588, "eval_loss": 0.9871988892555237, "eval_runtime": 57.6886, "eval_samples_per_second": 15.428, "eval_steps_per_second": 1.941, "eval_wer": 0.6096906565656566, "step": 47520 }, { "epoch": 45.0, "grad_norm": 0.7598063945770264, "learning_rate": 0.00016686213991769547, "loss": 0.0701, "step": 48600 }, { "epoch": 45.0, "eval_cer": 0.24457837277083436, "eval_loss": 0.9421331882476807, "eval_runtime": 55.063, "eval_samples_per_second": 16.163, "eval_steps_per_second": 2.034, "eval_wer": 0.6115845959595959, "step": 48600 }, { "epoch": 46.0, "grad_norm": 0.43303415179252625, "learning_rate": 0.00015575102880658438, "loss": 0.0648, "step": 49680 }, { "epoch": 46.0, "eval_cer": 0.24672726374549228, "eval_loss": 0.9590614438056946, "eval_runtime": 57.1789, "eval_samples_per_second": 15.565, "eval_steps_per_second": 1.959, "eval_wer": 0.6043244949494949, "step": 49680 }, { "epoch": 47.0, "grad_norm": 6.171388626098633, "learning_rate": 0.00014463991769547325, "loss": 0.0634, "step": 50760 }, { "epoch": 47.0, "eval_cer": 0.23551351084325445, "eval_loss": 0.9990620017051697, "eval_runtime": 55.5622, "eval_samples_per_second": 16.018, "eval_steps_per_second": 2.016, "eval_wer": 0.6109532828282829, "step": 50760 }, { "epoch": 48.0, "grad_norm": 0.05001814663410187, "learning_rate": 0.0001335390946502058, "loss": 0.0573, "step": 51840 }, { "epoch": 48.0, "eval_cer": 0.23452551499283703, "eval_loss": 0.9873119592666626, "eval_runtime": 55.0833, "eval_samples_per_second": 16.157, "eval_steps_per_second": 2.033, "eval_wer": 0.6054292929292929, "step": 51840 }, { "epoch": 49.0, "grad_norm": 3.651003360748291, "learning_rate": 0.00012242798353909466, "loss": 0.0527, "step": 52920 }, { "epoch": 49.0, "eval_cer": 0.23247542360322088, "eval_loss": 0.9885514974594116, "eval_runtime": 52.5162, "eval_samples_per_second": 16.947, "eval_steps_per_second": 2.133, "eval_wer": 0.5935921717171717, "step": 52920 }, { "epoch": 50.0, "grad_norm": 3.5055177211761475, "learning_rate": 0.00011131687242798354, "loss": 0.0506, "step": 54000 }, { "epoch": 50.0, "eval_cer": 0.22867163957911377, "eval_loss": 1.0199133157730103, "eval_runtime": 51.406, "eval_samples_per_second": 17.313, "eval_steps_per_second": 2.179, "eval_wer": 0.5940656565656566, "step": 54000 }, { "epoch": 51.0, "grad_norm": 0.08695941418409348, "learning_rate": 0.00010020576131687243, "loss": 0.0486, "step": 55080 }, { "epoch": 51.0, "eval_cer": 0.22634984933063282, "eval_loss": 1.0691256523132324, "eval_runtime": 54.2523, "eval_samples_per_second": 16.405, "eval_steps_per_second": 2.064, "eval_wer": 0.5880681818181818, "step": 55080 }, { "epoch": 52.0, "grad_norm": 0.4256766438484192, "learning_rate": 8.909465020576133e-05, "loss": 0.0447, "step": 56160 }, { "epoch": 52.0, "eval_cer": 0.22963493553327077, "eval_loss": 1.0140999555587769, "eval_runtime": 58.925, "eval_samples_per_second": 15.104, "eval_steps_per_second": 1.901, "eval_wer": 0.5893308080808081, "step": 56160 }, { "epoch": 53.0, "grad_norm": 3.884925365447998, "learning_rate": 7.799382716049382e-05, "loss": 0.0419, "step": 57240 }, { "epoch": 53.0, "eval_cer": 0.2279306426913007, "eval_loss": 1.0658098459243774, "eval_runtime": 50.8901, "eval_samples_per_second": 17.489, "eval_steps_per_second": 2.201, "eval_wer": 0.5872790404040404, "step": 57240 }, { "epoch": 54.0, "grad_norm": 0.5678676962852478, "learning_rate": 6.690329218106995e-05, "loss": 0.0376, "step": 58320 }, { "epoch": 54.0, "eval_cer": 0.2253618534802154, "eval_loss": 1.144079327583313, "eval_runtime": 52.3564, "eval_samples_per_second": 16.999, "eval_steps_per_second": 2.139, "eval_wer": 0.5888573232323232, "step": 58320 }, { "epoch": 55.0, "grad_norm": 1.0211379528045654, "learning_rate": 5.579218106995885e-05, "loss": 0.0355, "step": 59400 }, { "epoch": 55.0, "eval_cer": 0.22486785555500666, "eval_loss": 1.146174430847168, "eval_runtime": 50.8316, "eval_samples_per_second": 17.509, "eval_steps_per_second": 2.203, "eval_wer": 0.5880681818181818, "step": 59400 }, { "epoch": 56.0, "grad_norm": 0.02778603509068489, "learning_rate": 4.468106995884774e-05, "loss": 0.0335, "step": 60480 }, { "epoch": 56.0, "eval_cer": 0.22442325742231883, "eval_loss": 1.1712491512298584, "eval_runtime": 51.7561, "eval_samples_per_second": 17.196, "eval_steps_per_second": 2.164, "eval_wer": 0.5860164141414141, "step": 60480 }, { "epoch": 57.0, "grad_norm": 0.13397055864334106, "learning_rate": 3.3569958847736626e-05, "loss": 0.0296, "step": 61560 }, { "epoch": 57.0, "eval_cer": 0.22180506841871264, "eval_loss": 1.162169337272644, "eval_runtime": 51.0452, "eval_samples_per_second": 17.436, "eval_steps_per_second": 2.194, "eval_wer": 0.5785984848484849, "step": 61560 }, { "epoch": 58.0, "grad_norm": 0.001944132731296122, "learning_rate": 2.246913580246914e-05, "loss": 0.0301, "step": 62640 }, { "epoch": 58.0, "eval_cer": 0.22350936126068272, "eval_loss": 1.170377492904663, "eval_runtime": 50.364, "eval_samples_per_second": 17.671, "eval_steps_per_second": 2.224, "eval_wer": 0.5839646464646465, "step": 62640 }, { "epoch": 59.0, "grad_norm": 0.18270032107830048, "learning_rate": 1.1358024691358025e-05, "loss": 0.0283, "step": 63720 }, { "epoch": 59.0, "eval_cer": 0.22133577038976437, "eval_loss": 1.1973356008529663, "eval_runtime": 50.9914, "eval_samples_per_second": 17.454, "eval_steps_per_second": 2.196, "eval_wer": 0.5804924242424242, "step": 63720 }, { "epoch": 60.0, "grad_norm": 0.00017149873019661754, "learning_rate": 2.469135802469136e-07, "loss": 0.0245, "step": 64800 }, { "epoch": 60.0, "eval_cer": 0.2198290767178778, "eval_loss": 1.1907662153244019, "eval_runtime": 51.7658, "eval_samples_per_second": 17.193, "eval_steps_per_second": 2.164, "eval_wer": 0.5762310606060606, "step": 64800 }, { "epoch": 60.0, "step": 64800, "total_flos": 1.8440987587856836e+20, "train_loss": 0.1667554270485301, "train_runtime": 81955.137, "train_samples_per_second": 12.641, "train_steps_per_second": 0.791 } ], "logging_steps": 500, "max_steps": 64800, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8440987587856836e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }