outputs / trainer_state.json
Priyanship's picture
large_sami_22k_finetuned_ft_pseudolabels
0d1e472 verified
{
"best_metric": 0.38667929292929293,
"best_model_checkpoint": "/scratch/elec/puhe/p/palp3/sami_ASR/large_model_output/large-sami-22k-finetuned/outputs/checkpoint-1080",
"epoch": 60.0,
"eval_steps": 500,
"global_step": 64800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.5049565434455872,
"learning_rate": 3.32716049382716e-05,
"loss": 0.126,
"step": 1080
},
{
"epoch": 1.0,
"eval_cer": 0.1485204762139999,
"eval_loss": 0.4803544282913208,
"eval_runtime": 50.5555,
"eval_samples_per_second": 17.604,
"eval_steps_per_second": 2.215,
"eval_wer": 0.38667929292929293,
"step": 1080
},
{
"epoch": 2.0,
"grad_norm": 2.3093390464782715,
"learning_rate": 6.660493827160493e-05,
"loss": 0.1441,
"step": 2160
},
{
"epoch": 2.0,
"eval_cer": 0.19503038087240035,
"eval_loss": 0.6097356677055359,
"eval_runtime": 49.9713,
"eval_samples_per_second": 17.81,
"eval_steps_per_second": 2.241,
"eval_wer": 0.44239267676767674,
"step": 2160
},
{
"epoch": 3.0,
"grad_norm": 3.067934513092041,
"learning_rate": 9.99074074074074e-05,
"loss": 0.1675,
"step": 3240
},
{
"epoch": 3.0,
"eval_cer": 0.16756409623079582,
"eval_loss": 0.5237330198287964,
"eval_runtime": 52.1692,
"eval_samples_per_second": 17.06,
"eval_steps_per_second": 2.147,
"eval_wer": 0.444760101010101,
"step": 3240
},
{
"epoch": 4.0,
"grad_norm": 6.190335750579834,
"learning_rate": 0.00013324074074074074,
"loss": 0.1919,
"step": 4320
},
{
"epoch": 4.0,
"eval_cer": 0.188435508570864,
"eval_loss": 0.6256272196769714,
"eval_runtime": 49.8649,
"eval_samples_per_second": 17.848,
"eval_steps_per_second": 2.246,
"eval_wer": 0.484375,
"step": 4320
},
{
"epoch": 5.0,
"grad_norm": 10.29676342010498,
"learning_rate": 0.0001665432098765432,
"loss": 0.2168,
"step": 5400
},
{
"epoch": 5.0,
"eval_cer": 0.1991552635478931,
"eval_loss": 0.6817235946655273,
"eval_runtime": 49.9056,
"eval_samples_per_second": 17.834,
"eval_steps_per_second": 2.244,
"eval_wer": 0.5130997474747475,
"step": 5400
},
{
"epoch": 6.0,
"grad_norm": 16.29789924621582,
"learning_rate": 0.00019987654320987656,
"loss": 0.2411,
"step": 6480
},
{
"epoch": 6.0,
"eval_cer": 0.20411994269624067,
"eval_loss": 0.6815704703330994,
"eval_runtime": 47.2826,
"eval_samples_per_second": 18.823,
"eval_steps_per_second": 2.369,
"eval_wer": 0.5233585858585859,
"step": 6480
},
{
"epoch": 7.0,
"grad_norm": 13.96838665008545,
"learning_rate": 0.000233179012345679,
"loss": 0.2493,
"step": 7560
},
{
"epoch": 7.0,
"eval_cer": 0.2558662253618535,
"eval_loss": 0.8295482993125916,
"eval_runtime": 47.3326,
"eval_samples_per_second": 18.803,
"eval_steps_per_second": 2.366,
"eval_wer": 0.6788194444444444,
"step": 7560
},
{
"epoch": 8.0,
"grad_norm": 12.302577018737793,
"learning_rate": 0.0002665123456790123,
"loss": 0.2718,
"step": 8640
},
{
"epoch": 8.0,
"eval_cer": 0.2669070789902682,
"eval_loss": 0.8849073648452759,
"eval_runtime": 47.1385,
"eval_samples_per_second": 18.881,
"eval_steps_per_second": 2.376,
"eval_wer": 0.6756628787878788,
"step": 8640
},
{
"epoch": 9.0,
"grad_norm": 0.21664512157440186,
"learning_rate": 0.0002998148148148148,
"loss": 0.2922,
"step": 9720
},
{
"epoch": 9.0,
"eval_cer": 0.3401422714024601,
"eval_loss": 1.0527104139328003,
"eval_runtime": 49.2164,
"eval_samples_per_second": 18.083,
"eval_steps_per_second": 2.276,
"eval_wer": 0.6721906565656566,
"step": 9720
},
{
"epoch": 10.0,
"grad_norm": 7.9486260414123535,
"learning_rate": 0.0003331172839506173,
"loss": 0.3156,
"step": 10800
},
{
"epoch": 10.0,
"eval_cer": 0.35758039816232773,
"eval_loss": 1.0661259889602661,
"eval_runtime": 48.5028,
"eval_samples_per_second": 18.349,
"eval_steps_per_second": 2.309,
"eval_wer": 0.7528409090909091,
"step": 10800
},
{
"epoch": 11.0,
"grad_norm": 8.1552095413208,
"learning_rate": 0.0003664506172839506,
"loss": 0.3273,
"step": 11880
},
{
"epoch": 11.0,
"eval_cer": 0.2929654695450279,
"eval_loss": 1.0082694292068481,
"eval_runtime": 48.9797,
"eval_samples_per_second": 18.171,
"eval_steps_per_second": 2.287,
"eval_wer": 0.7840909090909091,
"step": 11880
},
{
"epoch": 12.0,
"grad_norm": 8.20614242553711,
"learning_rate": 0.00039978395061728396,
"loss": 0.3216,
"step": 12960
},
{
"epoch": 12.0,
"eval_cer": 0.3153682754532431,
"eval_loss": 1.130453109741211,
"eval_runtime": 48.376,
"eval_samples_per_second": 18.398,
"eval_steps_per_second": 2.315,
"eval_wer": 0.728219696969697,
"step": 12960
},
{
"epoch": 13.0,
"grad_norm": 14.636846542358398,
"learning_rate": 0.00043311728395061726,
"loss": 0.3498,
"step": 14040
},
{
"epoch": 13.0,
"eval_cer": 0.3106258953712394,
"eval_loss": 1.0758916139602661,
"eval_runtime": 48.0575,
"eval_samples_per_second": 18.519,
"eval_steps_per_second": 2.331,
"eval_wer": 0.7312184343434344,
"step": 14040
},
{
"epoch": 14.0,
"grad_norm": 4.806951999664307,
"learning_rate": 0.0004664506172839506,
"loss": 0.3553,
"step": 15120
},
{
"epoch": 14.0,
"eval_cer": 0.28031912265968484,
"eval_loss": 0.8731944561004639,
"eval_runtime": 47.2505,
"eval_samples_per_second": 18.836,
"eval_steps_per_second": 2.37,
"eval_wer": 0.6756628787878788,
"step": 15120
},
{
"epoch": 15.0,
"grad_norm": 0.5450202822685242,
"learning_rate": 0.0004997530864197531,
"loss": 0.3582,
"step": 16200
},
{
"epoch": 15.0,
"eval_cer": 0.31852986217457885,
"eval_loss": 1.055077075958252,
"eval_runtime": 46.8181,
"eval_samples_per_second": 19.01,
"eval_steps_per_second": 2.392,
"eval_wer": 0.7623106060606061,
"step": 16200
},
{
"epoch": 16.0,
"grad_norm": 5.1030144691467285,
"learning_rate": 0.0004889814814814815,
"loss": 0.3607,
"step": 17280
},
{
"epoch": 16.0,
"eval_cer": 0.3101071975497703,
"eval_loss": 1.0534826517105103,
"eval_runtime": 47.5102,
"eval_samples_per_second": 18.733,
"eval_steps_per_second": 2.357,
"eval_wer": 0.7482638888888888,
"step": 17280
},
{
"epoch": 17.0,
"grad_norm": 0.22218887507915497,
"learning_rate": 0.0004778703703703704,
"loss": 0.3447,
"step": 18360
},
{
"epoch": 17.0,
"eval_cer": 0.30813120584893544,
"eval_loss": 1.064017415046692,
"eval_runtime": 48.3671,
"eval_samples_per_second": 18.401,
"eval_steps_per_second": 2.316,
"eval_wer": 0.7369002525252525,
"step": 18360
},
{
"epoch": 18.0,
"grad_norm": 0.14536279439926147,
"learning_rate": 0.00046675925925925926,
"loss": 0.325,
"step": 19440
},
{
"epoch": 18.0,
"eval_cer": 0.2905448797115052,
"eval_loss": 1.0327048301696777,
"eval_runtime": 48.9592,
"eval_samples_per_second": 18.178,
"eval_steps_per_second": 2.288,
"eval_wer": 0.7534722222222222,
"step": 19440
},
{
"epoch": 19.0,
"grad_norm": 1.5726815462112427,
"learning_rate": 0.00045564814814814817,
"loss": 0.3022,
"step": 20520
},
{
"epoch": 19.0,
"eval_cer": 0.2886923874919725,
"eval_loss": 0.9869930148124695,
"eval_runtime": 49.3541,
"eval_samples_per_second": 18.033,
"eval_steps_per_second": 2.269,
"eval_wer": 0.7231691919191919,
"step": 20520
},
{
"epoch": 20.0,
"grad_norm": 0.41919103264808655,
"learning_rate": 0.00044454732510288065,
"loss": 0.2825,
"step": 21600
},
{
"epoch": 20.0,
"eval_cer": 0.28056612162228917,
"eval_loss": 0.9183225035667419,
"eval_runtime": 49.2359,
"eval_samples_per_second": 18.076,
"eval_steps_per_second": 2.275,
"eval_wer": 0.686395202020202,
"step": 21600
},
{
"epoch": 21.0,
"grad_norm": 12.236234664916992,
"learning_rate": 0.0004334362139917696,
"loss": 0.2706,
"step": 22680
},
{
"epoch": 21.0,
"eval_cer": 0.28604949859210593,
"eval_loss": 0.9366316795349121,
"eval_runtime": 49.1391,
"eval_samples_per_second": 18.112,
"eval_steps_per_second": 2.279,
"eval_wer": 0.6811868686868687,
"step": 22680
},
{
"epoch": 22.0,
"grad_norm": 4.797195911407471,
"learning_rate": 0.0004223353909465021,
"loss": 0.2507,
"step": 23760
},
{
"epoch": 22.0,
"eval_cer": 0.2608062046139406,
"eval_loss": 0.9585080146789551,
"eval_runtime": 48.7093,
"eval_samples_per_second": 18.272,
"eval_steps_per_second": 2.299,
"eval_wer": 0.6941287878787878,
"step": 23760
},
{
"epoch": 23.0,
"grad_norm": 4.625443935394287,
"learning_rate": 0.00041122427983539094,
"loss": 0.237,
"step": 24840
},
{
"epoch": 23.0,
"eval_cer": 0.28024502297090353,
"eval_loss": 1.010016918182373,
"eval_runtime": 50.1358,
"eval_samples_per_second": 17.752,
"eval_steps_per_second": 2.234,
"eval_wer": 0.6797664141414141,
"step": 24840
},
{
"epoch": 24.0,
"grad_norm": 0.49481087923049927,
"learning_rate": 0.00040011316872427984,
"loss": 0.2298,
"step": 25920
},
{
"epoch": 24.0,
"eval_cer": 0.24492417131848046,
"eval_loss": 0.9184597730636597,
"eval_runtime": 48.7455,
"eval_samples_per_second": 18.258,
"eval_steps_per_second": 2.298,
"eval_wer": 0.6349431818181818,
"step": 25920
},
{
"epoch": 25.0,
"grad_norm": 1.7336276769638062,
"learning_rate": 0.0003890123456790123,
"loss": 0.221,
"step": 27000
},
{
"epoch": 25.0,
"eval_cer": 0.27846663044015213,
"eval_loss": 0.9352790713310242,
"eval_runtime": 48.8906,
"eval_samples_per_second": 18.204,
"eval_steps_per_second": 2.291,
"eval_wer": 0.6579861111111112,
"step": 27000
},
{
"epoch": 26.0,
"grad_norm": 0.02212027832865715,
"learning_rate": 0.0003779012345679013,
"loss": 0.2052,
"step": 28080
},
{
"epoch": 26.0,
"eval_cer": 0.2507039470434224,
"eval_loss": 0.8651528358459473,
"eval_runtime": 49.0769,
"eval_samples_per_second": 18.135,
"eval_steps_per_second": 2.282,
"eval_wer": 0.6493055555555556,
"step": 28080
},
{
"epoch": 27.0,
"grad_norm": 2.215277910232544,
"learning_rate": 0.0003667901234567901,
"loss": 0.1928,
"step": 29160
},
{
"epoch": 27.0,
"eval_cer": 0.2630785950699007,
"eval_loss": 0.8858852386474609,
"eval_runtime": 49.657,
"eval_samples_per_second": 17.923,
"eval_steps_per_second": 2.255,
"eval_wer": 0.6775568181818182,
"step": 29160
},
{
"epoch": 28.0,
"grad_norm": 0.10988181829452515,
"learning_rate": 0.000355679012345679,
"loss": 0.1889,
"step": 30240
},
{
"epoch": 28.0,
"eval_cer": 0.2666353801314034,
"eval_loss": 0.9239539504051208,
"eval_runtime": 49.2302,
"eval_samples_per_second": 18.078,
"eval_steps_per_second": 2.275,
"eval_wer": 0.6636679292929293,
"step": 30240
},
{
"epoch": 29.0,
"grad_norm": 0.5829525589942932,
"learning_rate": 0.0003445781893004115,
"loss": 0.1771,
"step": 31320
},
{
"epoch": 29.0,
"eval_cer": 0.24934545274909845,
"eval_loss": 0.9042806625366211,
"eval_runtime": 52.6225,
"eval_samples_per_second": 16.913,
"eval_steps_per_second": 2.128,
"eval_wer": 0.6256313131313131,
"step": 31320
},
{
"epoch": 30.0,
"grad_norm": 3.2479238510131836,
"learning_rate": 0.00033346707818930046,
"loss": 0.163,
"step": 32400
},
{
"epoch": 30.0,
"eval_cer": 0.26213999901200413,
"eval_loss": 0.9130964875221252,
"eval_runtime": 50.9345,
"eval_samples_per_second": 17.473,
"eval_steps_per_second": 2.199,
"eval_wer": 0.6504103535353535,
"step": 32400
},
{
"epoch": 31.0,
"grad_norm": 2.047846555709839,
"learning_rate": 0.0003223559670781893,
"loss": 0.1603,
"step": 33480
},
{
"epoch": 31.0,
"eval_cer": 0.24055228968038334,
"eval_loss": 0.8102329969406128,
"eval_runtime": 50.6115,
"eval_samples_per_second": 17.585,
"eval_steps_per_second": 2.213,
"eval_wer": 0.6319444444444444,
"step": 33480
},
{
"epoch": 32.0,
"grad_norm": 0.3893296420574188,
"learning_rate": 0.0003112448559670782,
"loss": 0.1447,
"step": 34560
},
{
"epoch": 32.0,
"eval_cer": 0.2447512720446574,
"eval_loss": 0.9245155453681946,
"eval_runtime": 51.908,
"eval_samples_per_second": 17.146,
"eval_steps_per_second": 2.158,
"eval_wer": 0.6336805555555556,
"step": 34560
},
{
"epoch": 33.0,
"grad_norm": 2.6302273273468018,
"learning_rate": 0.0003001440329218107,
"loss": 0.1418,
"step": 35640
},
{
"epoch": 33.0,
"eval_cer": 0.25300103739564295,
"eval_loss": 0.9590283632278442,
"eval_runtime": 52.0031,
"eval_samples_per_second": 17.114,
"eval_steps_per_second": 2.154,
"eval_wer": 0.6235795454545454,
"step": 35640
},
{
"epoch": 34.0,
"grad_norm": 3.61879301071167,
"learning_rate": 0.0002890432098765432,
"loss": 0.1415,
"step": 36720
},
{
"epoch": 34.0,
"eval_cer": 0.2578916168552092,
"eval_loss": 0.92754727602005,
"eval_runtime": 52.0318,
"eval_samples_per_second": 17.105,
"eval_steps_per_second": 2.153,
"eval_wer": 0.634469696969697,
"step": 36720
},
{
"epoch": 35.0,
"grad_norm": 6.908621311187744,
"learning_rate": 0.00027793209876543213,
"loss": 0.1313,
"step": 37800
},
{
"epoch": 35.0,
"eval_cer": 0.24981475077804674,
"eval_loss": 0.8644362688064575,
"eval_runtime": 53.8225,
"eval_samples_per_second": 16.536,
"eval_steps_per_second": 2.081,
"eval_wer": 0.6279987373737373,
"step": 37800
},
{
"epoch": 36.0,
"grad_norm": 2.5687201023101807,
"learning_rate": 0.000266820987654321,
"loss": 0.1285,
"step": 38880
},
{
"epoch": 36.0,
"eval_cer": 0.26505458677073557,
"eval_loss": 0.9070570468902588,
"eval_runtime": 55.322,
"eval_samples_per_second": 16.088,
"eval_steps_per_second": 2.025,
"eval_wer": 0.625,
"step": 38880
},
{
"epoch": 37.0,
"grad_norm": 0.1792680323123932,
"learning_rate": 0.0002557098765432099,
"loss": 0.1204,
"step": 39960
},
{
"epoch": 37.0,
"eval_cer": 0.2386503976683298,
"eval_loss": 0.8658037185668945,
"eval_runtime": 54.276,
"eval_samples_per_second": 16.398,
"eval_steps_per_second": 2.064,
"eval_wer": 0.6092171717171717,
"step": 39960
},
{
"epoch": 38.0,
"grad_norm": 0.05945800244808197,
"learning_rate": 0.0002445987654320988,
"loss": 0.1116,
"step": 41040
},
{
"epoch": 38.0,
"eval_cer": 0.24588746727263747,
"eval_loss": 0.8684060573577881,
"eval_runtime": 55.9431,
"eval_samples_per_second": 15.909,
"eval_steps_per_second": 2.002,
"eval_wer": 0.6267361111111112,
"step": 41040
},
{
"epoch": 39.0,
"grad_norm": 2.164262056350708,
"learning_rate": 0.00023349794238683127,
"loss": 0.102,
"step": 42120
},
{
"epoch": 39.0,
"eval_cer": 0.24102158770933163,
"eval_loss": 0.9792320728302002,
"eval_runtime": 54.7942,
"eval_samples_per_second": 16.243,
"eval_steps_per_second": 2.044,
"eval_wer": 0.6245265151515151,
"step": 42120
},
{
"epoch": 40.0,
"grad_norm": 7.841192722320557,
"learning_rate": 0.00022238683127572017,
"loss": 0.0966,
"step": 43200
},
{
"epoch": 40.0,
"eval_cer": 0.2466037642641901,
"eval_loss": 0.8880752325057983,
"eval_runtime": 57.0632,
"eval_samples_per_second": 15.597,
"eval_steps_per_second": 1.963,
"eval_wer": 0.6163194444444444,
"step": 43200
},
{
"epoch": 41.0,
"grad_norm": 0.5480403304100037,
"learning_rate": 0.00021128600823045268,
"loss": 0.0934,
"step": 44280
},
{
"epoch": 41.0,
"eval_cer": 0.23398211727510745,
"eval_loss": 0.8669174909591675,
"eval_runtime": 56.5233,
"eval_samples_per_second": 15.746,
"eval_steps_per_second": 1.981,
"eval_wer": 0.5970643939393939,
"step": 44280
},
{
"epoch": 42.0,
"grad_norm": 2.996035099029541,
"learning_rate": 0.00020017489711934155,
"loss": 0.0847,
"step": 45360
},
{
"epoch": 42.0,
"eval_cer": 0.2370696043076619,
"eval_loss": 0.9717867970466614,
"eval_runtime": 55.4728,
"eval_samples_per_second": 16.044,
"eval_steps_per_second": 2.019,
"eval_wer": 0.6207386363636364,
"step": 45360
},
{
"epoch": 43.0,
"grad_norm": 0.41690441966056824,
"learning_rate": 0.00018907407407407406,
"loss": 0.0828,
"step": 46440
},
{
"epoch": 43.0,
"eval_cer": 0.2392925949711011,
"eval_loss": 0.957336962223053,
"eval_runtime": 54.9772,
"eval_samples_per_second": 16.189,
"eval_steps_per_second": 2.037,
"eval_wer": 0.6223169191919192,
"step": 46440
},
{
"epoch": 44.0,
"grad_norm": 0.07533986121416092,
"learning_rate": 0.0001779732510288066,
"loss": 0.0727,
"step": 47520
},
{
"epoch": 44.0,
"eval_cer": 0.2357605098058588,
"eval_loss": 0.9871988892555237,
"eval_runtime": 57.6886,
"eval_samples_per_second": 15.428,
"eval_steps_per_second": 1.941,
"eval_wer": 0.6096906565656566,
"step": 47520
},
{
"epoch": 45.0,
"grad_norm": 0.7598063945770264,
"learning_rate": 0.00016686213991769547,
"loss": 0.0701,
"step": 48600
},
{
"epoch": 45.0,
"eval_cer": 0.24457837277083436,
"eval_loss": 0.9421331882476807,
"eval_runtime": 55.063,
"eval_samples_per_second": 16.163,
"eval_steps_per_second": 2.034,
"eval_wer": 0.6115845959595959,
"step": 48600
},
{
"epoch": 46.0,
"grad_norm": 0.43303415179252625,
"learning_rate": 0.00015575102880658438,
"loss": 0.0648,
"step": 49680
},
{
"epoch": 46.0,
"eval_cer": 0.24672726374549228,
"eval_loss": 0.9590614438056946,
"eval_runtime": 57.1789,
"eval_samples_per_second": 15.565,
"eval_steps_per_second": 1.959,
"eval_wer": 0.6043244949494949,
"step": 49680
},
{
"epoch": 47.0,
"grad_norm": 6.171388626098633,
"learning_rate": 0.00014463991769547325,
"loss": 0.0634,
"step": 50760
},
{
"epoch": 47.0,
"eval_cer": 0.23551351084325445,
"eval_loss": 0.9990620017051697,
"eval_runtime": 55.5622,
"eval_samples_per_second": 16.018,
"eval_steps_per_second": 2.016,
"eval_wer": 0.6109532828282829,
"step": 50760
},
{
"epoch": 48.0,
"grad_norm": 0.05001814663410187,
"learning_rate": 0.0001335390946502058,
"loss": 0.0573,
"step": 51840
},
{
"epoch": 48.0,
"eval_cer": 0.23452551499283703,
"eval_loss": 0.9873119592666626,
"eval_runtime": 55.0833,
"eval_samples_per_second": 16.157,
"eval_steps_per_second": 2.033,
"eval_wer": 0.6054292929292929,
"step": 51840
},
{
"epoch": 49.0,
"grad_norm": 3.651003360748291,
"learning_rate": 0.00012242798353909466,
"loss": 0.0527,
"step": 52920
},
{
"epoch": 49.0,
"eval_cer": 0.23247542360322088,
"eval_loss": 0.9885514974594116,
"eval_runtime": 52.5162,
"eval_samples_per_second": 16.947,
"eval_steps_per_second": 2.133,
"eval_wer": 0.5935921717171717,
"step": 52920
},
{
"epoch": 50.0,
"grad_norm": 3.5055177211761475,
"learning_rate": 0.00011131687242798354,
"loss": 0.0506,
"step": 54000
},
{
"epoch": 50.0,
"eval_cer": 0.22867163957911377,
"eval_loss": 1.0199133157730103,
"eval_runtime": 51.406,
"eval_samples_per_second": 17.313,
"eval_steps_per_second": 2.179,
"eval_wer": 0.5940656565656566,
"step": 54000
},
{
"epoch": 51.0,
"grad_norm": 0.08695941418409348,
"learning_rate": 0.00010020576131687243,
"loss": 0.0486,
"step": 55080
},
{
"epoch": 51.0,
"eval_cer": 0.22634984933063282,
"eval_loss": 1.0691256523132324,
"eval_runtime": 54.2523,
"eval_samples_per_second": 16.405,
"eval_steps_per_second": 2.064,
"eval_wer": 0.5880681818181818,
"step": 55080
},
{
"epoch": 52.0,
"grad_norm": 0.4256766438484192,
"learning_rate": 8.909465020576133e-05,
"loss": 0.0447,
"step": 56160
},
{
"epoch": 52.0,
"eval_cer": 0.22963493553327077,
"eval_loss": 1.0140999555587769,
"eval_runtime": 58.925,
"eval_samples_per_second": 15.104,
"eval_steps_per_second": 1.901,
"eval_wer": 0.5893308080808081,
"step": 56160
},
{
"epoch": 53.0,
"grad_norm": 3.884925365447998,
"learning_rate": 7.799382716049382e-05,
"loss": 0.0419,
"step": 57240
},
{
"epoch": 53.0,
"eval_cer": 0.2279306426913007,
"eval_loss": 1.0658098459243774,
"eval_runtime": 50.8901,
"eval_samples_per_second": 17.489,
"eval_steps_per_second": 2.201,
"eval_wer": 0.5872790404040404,
"step": 57240
},
{
"epoch": 54.0,
"grad_norm": 0.5678676962852478,
"learning_rate": 6.690329218106995e-05,
"loss": 0.0376,
"step": 58320
},
{
"epoch": 54.0,
"eval_cer": 0.2253618534802154,
"eval_loss": 1.144079327583313,
"eval_runtime": 52.3564,
"eval_samples_per_second": 16.999,
"eval_steps_per_second": 2.139,
"eval_wer": 0.5888573232323232,
"step": 58320
},
{
"epoch": 55.0,
"grad_norm": 1.0211379528045654,
"learning_rate": 5.579218106995885e-05,
"loss": 0.0355,
"step": 59400
},
{
"epoch": 55.0,
"eval_cer": 0.22486785555500666,
"eval_loss": 1.146174430847168,
"eval_runtime": 50.8316,
"eval_samples_per_second": 17.509,
"eval_steps_per_second": 2.203,
"eval_wer": 0.5880681818181818,
"step": 59400
},
{
"epoch": 56.0,
"grad_norm": 0.02778603509068489,
"learning_rate": 4.468106995884774e-05,
"loss": 0.0335,
"step": 60480
},
{
"epoch": 56.0,
"eval_cer": 0.22442325742231883,
"eval_loss": 1.1712491512298584,
"eval_runtime": 51.7561,
"eval_samples_per_second": 17.196,
"eval_steps_per_second": 2.164,
"eval_wer": 0.5860164141414141,
"step": 60480
},
{
"epoch": 57.0,
"grad_norm": 0.13397055864334106,
"learning_rate": 3.3569958847736626e-05,
"loss": 0.0296,
"step": 61560
},
{
"epoch": 57.0,
"eval_cer": 0.22180506841871264,
"eval_loss": 1.162169337272644,
"eval_runtime": 51.0452,
"eval_samples_per_second": 17.436,
"eval_steps_per_second": 2.194,
"eval_wer": 0.5785984848484849,
"step": 61560
},
{
"epoch": 58.0,
"grad_norm": 0.001944132731296122,
"learning_rate": 2.246913580246914e-05,
"loss": 0.0301,
"step": 62640
},
{
"epoch": 58.0,
"eval_cer": 0.22350936126068272,
"eval_loss": 1.170377492904663,
"eval_runtime": 50.364,
"eval_samples_per_second": 17.671,
"eval_steps_per_second": 2.224,
"eval_wer": 0.5839646464646465,
"step": 62640
},
{
"epoch": 59.0,
"grad_norm": 0.18270032107830048,
"learning_rate": 1.1358024691358025e-05,
"loss": 0.0283,
"step": 63720
},
{
"epoch": 59.0,
"eval_cer": 0.22133577038976437,
"eval_loss": 1.1973356008529663,
"eval_runtime": 50.9914,
"eval_samples_per_second": 17.454,
"eval_steps_per_second": 2.196,
"eval_wer": 0.5804924242424242,
"step": 63720
},
{
"epoch": 60.0,
"grad_norm": 0.00017149873019661754,
"learning_rate": 2.469135802469136e-07,
"loss": 0.0245,
"step": 64800
},
{
"epoch": 60.0,
"eval_cer": 0.2198290767178778,
"eval_loss": 1.1907662153244019,
"eval_runtime": 51.7658,
"eval_samples_per_second": 17.193,
"eval_steps_per_second": 2.164,
"eval_wer": 0.5762310606060606,
"step": 64800
},
{
"epoch": 60.0,
"step": 64800,
"total_flos": 1.8440987587856836e+20,
"train_loss": 0.1667554270485301,
"train_runtime": 81955.137,
"train_samples_per_second": 12.641,
"train_steps_per_second": 0.791
}
],
"logging_steps": 500,
"max_steps": 64800,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.8440987587856836e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}