mana-tts / homo-t5 /trainer_state.json
abreza's picture
add ge2pe
eb57aa1
{
"best_metric": 0.02429259568452835,
"best_model_checkpoint": "./phase3-t5/checkpoint-484000",
"epoch": 50.0,
"eval_steps": 1000,
"global_step": 487100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10264832683227264,
"grad_norm": 0.2675953805446625,
"learning_rate": 0.0005,
"loss": 0.1517,
"step": 1000
},
{
"epoch": 0.10264832683227264,
"eval_cer": 0.3379739336492891,
"eval_loss": 0.09135649353265762,
"eval_runtime": 61.2416,
"eval_samples_per_second": 1.47,
"eval_steps_per_second": 0.016,
"eval_wer": 0.42716711349419123,
"step": 1000
},
{
"epoch": 0.20529665366454528,
"grad_norm": 0.21362937986850739,
"learning_rate": 0.0004989714050606871,
"loss": 0.1197,
"step": 2000
},
{
"epoch": 0.20529665366454528,
"eval_cer": 0.15506516587677724,
"eval_loss": 0.08493143320083618,
"eval_runtime": 60.3144,
"eval_samples_per_second": 1.492,
"eval_steps_per_second": 0.017,
"eval_wer": 0.2645218945487042,
"step": 2000
},
{
"epoch": 0.3079449804968179,
"grad_norm": 0.381354957818985,
"learning_rate": 0.0004979428101213742,
"loss": 0.1119,
"step": 3000
},
{
"epoch": 0.3079449804968179,
"eval_cer": 0.13122037914691942,
"eval_loss": 0.07921701669692993,
"eval_runtime": 41.6322,
"eval_samples_per_second": 2.162,
"eval_steps_per_second": 0.024,
"eval_wer": 0.23145665773011617,
"step": 3000
},
{
"epoch": 0.41059330732909055,
"grad_norm": 0.21338549256324768,
"learning_rate": 0.0004969142151820613,
"loss": 0.1083,
"step": 4000
},
{
"epoch": 0.41059330732909055,
"eval_cer": 0.05139218009478673,
"eval_loss": 0.07416867464780807,
"eval_runtime": 24.7028,
"eval_samples_per_second": 3.643,
"eval_steps_per_second": 0.04,
"eval_wer": 0.14655942806076855,
"step": 4000
},
{
"epoch": 0.5132416341613631,
"grad_norm": 0.16844697296619415,
"learning_rate": 0.0004958856202427484,
"loss": 0.1054,
"step": 5000
},
{
"epoch": 0.5132416341613631,
"eval_cer": 0.06916469194312796,
"eval_loss": 0.0707884430885315,
"eval_runtime": 32.9432,
"eval_samples_per_second": 2.732,
"eval_steps_per_second": 0.03,
"eval_wer": 0.1572832886505809,
"step": 5000
},
{
"epoch": 0.6158899609936358,
"grad_norm": 0.2411990612745285,
"learning_rate": 0.0004948570253034355,
"loss": 0.1033,
"step": 6000
},
{
"epoch": 0.6158899609936358,
"eval_cer": 0.1552132701421801,
"eval_loss": 0.07135774940252304,
"eval_runtime": 46.0141,
"eval_samples_per_second": 1.956,
"eval_steps_per_second": 0.022,
"eval_wer": 0.24128686327077747,
"step": 6000
},
{
"epoch": 0.7185382878259085,
"grad_norm": 0.24947325885295868,
"learning_rate": 0.0004938284303641226,
"loss": 0.1017,
"step": 7000
},
{
"epoch": 0.7185382878259085,
"eval_cer": 0.059537914691943125,
"eval_loss": 0.06659836322069168,
"eval_runtime": 45.8436,
"eval_samples_per_second": 1.963,
"eval_steps_per_second": 0.022,
"eval_wer": 0.14298480786416443,
"step": 7000
},
{
"epoch": 0.8211866146581811,
"grad_norm": 0.14766907691955566,
"learning_rate": 0.0004927998354248098,
"loss": 0.1006,
"step": 8000
},
{
"epoch": 0.8211866146581811,
"eval_cer": 0.0727191943127962,
"eval_loss": 0.06510724872350693,
"eval_runtime": 51.2808,
"eval_samples_per_second": 1.755,
"eval_steps_per_second": 0.02,
"eval_wer": 0.15638963360142985,
"step": 8000
},
{
"epoch": 0.9238349414904538,
"grad_norm": 0.25097745656967163,
"learning_rate": 0.0004917712404854969,
"loss": 0.0982,
"step": 9000
},
{
"epoch": 0.9238349414904538,
"eval_cer": 0.04576421800947867,
"eval_loss": 0.06489837914705276,
"eval_runtime": 45.8464,
"eval_samples_per_second": 1.963,
"eval_steps_per_second": 0.022,
"eval_wer": 0.13047363717605004,
"step": 9000
},
{
"epoch": 1.0264832683227263,
"grad_norm": 0.31185394525527954,
"learning_rate": 0.000490742645546184,
"loss": 0.0976,
"step": 10000
},
{
"epoch": 1.0264832683227263,
"eval_cer": 0.050355450236966824,
"eval_loss": 0.06680955737829208,
"eval_runtime": 46.015,
"eval_samples_per_second": 1.956,
"eval_steps_per_second": 0.022,
"eval_wer": 0.13494191242180517,
"step": 10000
},
{
"epoch": 1.129131595154999,
"grad_norm": 0.2225043922662735,
"learning_rate": 0.000489714050606871,
"loss": 0.0944,
"step": 11000
},
{
"epoch": 1.129131595154999,
"eval_cer": 0.03806279620853081,
"eval_loss": 0.06565282493829727,
"eval_runtime": 39.9826,
"eval_samples_per_second": 2.251,
"eval_steps_per_second": 0.025,
"eval_wer": 0.1224307417336908,
"step": 11000
},
{
"epoch": 1.2317799219872716,
"grad_norm": 0.2643093168735504,
"learning_rate": 0.0004886854556675581,
"loss": 0.095,
"step": 12000
},
{
"epoch": 1.2317799219872716,
"eval_cer": 0.042950236966824644,
"eval_loss": 0.06407604366540909,
"eval_runtime": 38.3229,
"eval_samples_per_second": 2.348,
"eval_steps_per_second": 0.026,
"eval_wer": 0.12779267202859695,
"step": 12000
},
{
"epoch": 1.3344282488195442,
"grad_norm": 0.1704595983028412,
"learning_rate": 0.00048765686072824524,
"loss": 0.0934,
"step": 13000
},
{
"epoch": 1.3344282488195442,
"eval_cer": 0.030953791469194313,
"eval_loss": 0.06052744388580322,
"eval_runtime": 43.0244,
"eval_samples_per_second": 2.092,
"eval_steps_per_second": 0.023,
"eval_wer": 0.11081322609472744,
"step": 13000
},
{
"epoch": 1.437076575651817,
"grad_norm": 0.28452351689338684,
"learning_rate": 0.00048662826578893233,
"loss": 0.0922,
"step": 14000
},
{
"epoch": 1.437076575651817,
"eval_cer": 0.030065165876777253,
"eval_loss": 0.06290669739246368,
"eval_runtime": 38.566,
"eval_samples_per_second": 2.334,
"eval_steps_per_second": 0.026,
"eval_wer": 0.1063449508489723,
"step": 14000
},
{
"epoch": 1.5397249024840896,
"grad_norm": 0.18739064037799835,
"learning_rate": 0.0004855996708496194,
"loss": 0.0914,
"step": 15000
},
{
"epoch": 1.5397249024840896,
"eval_cer": 0.09389810426540285,
"eval_loss": 0.060126081109046936,
"eval_runtime": 60.9362,
"eval_samples_per_second": 1.477,
"eval_steps_per_second": 0.016,
"eval_wer": 0.1742627345844504,
"step": 15000
},
{
"epoch": 1.642373229316362,
"grad_norm": 0.14926160871982574,
"learning_rate": 0.0004845710759103065,
"loss": 0.0923,
"step": 16000
},
{
"epoch": 1.642373229316362,
"eval_cer": 0.04428317535545024,
"eval_loss": 0.05910157784819603,
"eval_runtime": 35.0608,
"eval_samples_per_second": 2.567,
"eval_steps_per_second": 0.029,
"eval_wer": 0.11974977658623771,
"step": 16000
},
{
"epoch": 1.7450215561486346,
"grad_norm": 0.15932171046733856,
"learning_rate": 0.0004835424809709936,
"loss": 0.0923,
"step": 17000
},
{
"epoch": 1.7450215561486346,
"eval_cer": 0.08975118483412323,
"eval_loss": 0.05897140130400658,
"eval_runtime": 37.0041,
"eval_samples_per_second": 2.432,
"eval_steps_per_second": 0.027,
"eval_wer": 0.16979445933869527,
"step": 17000
},
{
"epoch": 1.8476698829809073,
"grad_norm": 0.23555859923362732,
"learning_rate": 0.0004825138860316807,
"loss": 0.0901,
"step": 18000
},
{
"epoch": 1.8476698829809073,
"eval_cer": 0.033619668246445494,
"eval_loss": 0.057106491178274155,
"eval_runtime": 20.6425,
"eval_samples_per_second": 4.36,
"eval_steps_per_second": 0.048,
"eval_wer": 0.10723860589812333,
"step": 18000
},
{
"epoch": 1.95031820981318,
"grad_norm": 0.16271623969078064,
"learning_rate": 0.00048148529109236785,
"loss": 0.0896,
"step": 19000
},
{
"epoch": 1.95031820981318,
"eval_cer": 0.039543838862559244,
"eval_loss": 0.05746171995997429,
"eval_runtime": 21.3164,
"eval_samples_per_second": 4.222,
"eval_steps_per_second": 0.047,
"eval_wer": 0.11349419124218052,
"step": 19000
},
{
"epoch": 2.0529665366454526,
"grad_norm": 0.225450336933136,
"learning_rate": 0.00048045669615305494,
"loss": 0.0879,
"step": 20000
},
{
"epoch": 2.0529665366454526,
"eval_cer": 0.041321090047393365,
"eval_loss": 0.05702373385429382,
"eval_runtime": 22.0309,
"eval_samples_per_second": 4.085,
"eval_steps_per_second": 0.045,
"eval_wer": 0.11170688114387846,
"step": 20000
},
{
"epoch": 2.1556148634777252,
"grad_norm": 0.2128904014825821,
"learning_rate": 0.00047942810121374204,
"loss": 0.0875,
"step": 21000
},
{
"epoch": 2.1556148634777252,
"eval_cer": 0.03154620853080569,
"eval_loss": 0.05841705948114395,
"eval_runtime": 22.0788,
"eval_samples_per_second": 4.076,
"eval_steps_per_second": 0.045,
"eval_wer": 0.10187667560321716,
"step": 21000
},
{
"epoch": 2.258263190309998,
"grad_norm": 0.24170714616775513,
"learning_rate": 0.00047839950627442913,
"loss": 0.0869,
"step": 22000
},
{
"epoch": 2.258263190309998,
"eval_cer": 0.034212085308056875,
"eval_loss": 0.057785358279943466,
"eval_runtime": 22.1563,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.10902591599642537,
"step": 22000
},
{
"epoch": 2.3609115171422705,
"grad_norm": 0.261454313993454,
"learning_rate": 0.0004773709113351162,
"loss": 0.0868,
"step": 23000
},
{
"epoch": 2.3609115171422705,
"eval_cer": 0.03450829383886256,
"eval_loss": 0.05649031326174736,
"eval_runtime": 22.1442,
"eval_samples_per_second": 4.064,
"eval_steps_per_second": 0.045,
"eval_wer": 0.10723860589812333,
"step": 23000
},
{
"epoch": 2.463559843974543,
"grad_norm": 0.24116463959217072,
"learning_rate": 0.0004763423163958033,
"loss": 0.0861,
"step": 24000
},
{
"epoch": 2.463559843974543,
"eval_cer": 0.03169431279620853,
"eval_loss": 0.05647359788417816,
"eval_runtime": 23.1195,
"eval_samples_per_second": 3.893,
"eval_steps_per_second": 0.043,
"eval_wer": 0.10187667560321716,
"step": 24000
},
{
"epoch": 2.566208170806816,
"grad_norm": 0.19247783720493317,
"learning_rate": 0.0004753137214564904,
"loss": 0.0861,
"step": 25000
},
{
"epoch": 2.566208170806816,
"eval_cer": 0.027103080568720378,
"eval_loss": 0.05473396182060242,
"eval_runtime": 21.8207,
"eval_samples_per_second": 4.125,
"eval_steps_per_second": 0.046,
"eval_wer": 0.10008936550491511,
"step": 25000
},
{
"epoch": 2.6688564976390885,
"grad_norm": 0.20277945697307587,
"learning_rate": 0.00047428512651717756,
"loss": 0.0859,
"step": 26000
},
{
"epoch": 2.6688564976390885,
"eval_cer": 0.028880331753554502,
"eval_loss": 0.05520312860608101,
"eval_runtime": 22.4109,
"eval_samples_per_second": 4.016,
"eval_steps_per_second": 0.045,
"eval_wer": 0.10187667560321716,
"step": 26000
},
{
"epoch": 2.771504824471361,
"grad_norm": 0.17546099424362183,
"learning_rate": 0.00047325653157786465,
"loss": 0.0853,
"step": 27000
},
{
"epoch": 2.771504824471361,
"eval_cer": 0.027251184834123223,
"eval_loss": 0.05452750623226166,
"eval_runtime": 22.3468,
"eval_samples_per_second": 4.027,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09562109025915996,
"step": 27000
},
{
"epoch": 2.874153151303634,
"grad_norm": 0.23071998357772827,
"learning_rate": 0.00047222793663855174,
"loss": 0.0857,
"step": 28000
},
{
"epoch": 2.874153151303634,
"eval_cer": 0.027399289099526065,
"eval_loss": 0.05141612887382507,
"eval_runtime": 22.353,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09651474530831099,
"step": 28000
},
{
"epoch": 2.9768014781359065,
"grad_norm": 0.2768128216266632,
"learning_rate": 0.00047119934169923884,
"loss": 0.0844,
"step": 29000
},
{
"epoch": 2.9768014781359065,
"eval_cer": 0.030805687203791468,
"eval_loss": 0.053941383957862854,
"eval_runtime": 22.2529,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.1063449508489723,
"step": 29000
},
{
"epoch": 3.079449804968179,
"grad_norm": 0.21525971591472626,
"learning_rate": 0.00047017074675992593,
"loss": 0.0828,
"step": 30000
},
{
"epoch": 3.079449804968179,
"eval_cer": 0.027843601895734597,
"eval_loss": 0.05354895442724228,
"eval_runtime": 22.123,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0938337801608579,
"step": 30000
},
{
"epoch": 3.1820981318004518,
"grad_norm": 0.36673811078071594,
"learning_rate": 0.000469142151820613,
"loss": 0.0825,
"step": 31000
},
{
"epoch": 3.1820981318004518,
"eval_cer": 0.02843601895734597,
"eval_loss": 0.05313113331794739,
"eval_runtime": 22.1652,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.10008936550491511,
"step": 31000
},
{
"epoch": 3.2847464586327244,
"grad_norm": 0.20910222828388214,
"learning_rate": 0.0004681135568813001,
"loss": 0.0815,
"step": 32000
},
{
"epoch": 3.2847464586327244,
"eval_cer": 0.04191350710900474,
"eval_loss": 0.05280559882521629,
"eval_runtime": 25.447,
"eval_samples_per_second": 3.537,
"eval_steps_per_second": 0.039,
"eval_wer": 0.11349419124218052,
"step": 32000
},
{
"epoch": 3.387394785464997,
"grad_norm": 0.18754634261131287,
"learning_rate": 0.00046708496194198726,
"loss": 0.0815,
"step": 33000
},
{
"epoch": 3.387394785464997,
"eval_cer": 0.029472748815165876,
"eval_loss": 0.05334876477718353,
"eval_runtime": 21.9504,
"eval_samples_per_second": 4.1,
"eval_steps_per_second": 0.046,
"eval_wer": 0.10545129579982127,
"step": 33000
},
{
"epoch": 3.4900431122972697,
"grad_norm": 0.2228628695011139,
"learning_rate": 0.00046605636700267436,
"loss": 0.0821,
"step": 34000
},
{
"epoch": 3.4900431122972697,
"eval_cer": 0.029917061611374408,
"eval_loss": 0.05586351081728935,
"eval_runtime": 21.6771,
"eval_samples_per_second": 4.152,
"eval_steps_per_second": 0.046,
"eval_wer": 0.10545129579982127,
"step": 34000
},
{
"epoch": 3.592691439129542,
"grad_norm": 0.13316944241523743,
"learning_rate": 0.00046502777206336145,
"loss": 0.0813,
"step": 35000
},
{
"epoch": 3.592691439129542,
"eval_cer": 0.029324644549763034,
"eval_loss": 0.05374256148934364,
"eval_runtime": 21.9747,
"eval_samples_per_second": 4.096,
"eval_steps_per_second": 0.046,
"eval_wer": 0.10277033065236818,
"step": 35000
},
{
"epoch": 3.6953397659618146,
"grad_norm": 0.16168580949306488,
"learning_rate": 0.00046399917712404854,
"loss": 0.0812,
"step": 36000
},
{
"epoch": 3.6953397659618146,
"eval_cer": 0.03450829383886256,
"eval_loss": 0.05287105217576027,
"eval_runtime": 24.1708,
"eval_samples_per_second": 3.724,
"eval_steps_per_second": 0.041,
"eval_wer": 0.10008936550491511,
"step": 36000
},
{
"epoch": 3.7979880927940872,
"grad_norm": 0.20095530152320862,
"learning_rate": 0.00046297058218473564,
"loss": 0.0814,
"step": 37000
},
{
"epoch": 3.7979880927940872,
"eval_cer": 0.035545023696682464,
"eval_loss": 0.05389825999736786,
"eval_runtime": 21.7718,
"eval_samples_per_second": 4.134,
"eval_steps_per_second": 0.046,
"eval_wer": 0.10723860589812333,
"step": 37000
},
{
"epoch": 3.90063641962636,
"grad_norm": 0.19599634408950806,
"learning_rate": 0.00046194198724542273,
"loss": 0.0815,
"step": 38000
},
{
"epoch": 3.90063641962636,
"eval_cer": 0.026954976303317536,
"eval_loss": 0.05257488787174225,
"eval_runtime": 22.0572,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09651474530831099,
"step": 38000
},
{
"epoch": 4.0032847464586325,
"grad_norm": 0.15417757630348206,
"learning_rate": 0.0004609133923061098,
"loss": 0.0806,
"step": 39000
},
{
"epoch": 4.0032847464586325,
"eval_cer": 0.027695497630331755,
"eval_loss": 0.05188766494393349,
"eval_runtime": 22.1269,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09830205540661305,
"step": 39000
},
{
"epoch": 4.105933073290905,
"grad_norm": 0.26163730025291443,
"learning_rate": 0.00045988479736679697,
"loss": 0.0794,
"step": 40000
},
{
"epoch": 4.105933073290905,
"eval_cer": 0.025622037914691944,
"eval_loss": 0.048877034336328506,
"eval_runtime": 22.2755,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08847184986595175,
"step": 40000
},
{
"epoch": 4.208581400123178,
"grad_norm": 0.20826220512390137,
"learning_rate": 0.00045885620242748406,
"loss": 0.0785,
"step": 41000
},
{
"epoch": 4.208581400123178,
"eval_cer": 0.025622037914691944,
"eval_loss": 0.05260869115591049,
"eval_runtime": 22.0992,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09204647006255585,
"step": 41000
},
{
"epoch": 4.3112297269554505,
"grad_norm": 0.3389241695404053,
"learning_rate": 0.00045782760748817116,
"loss": 0.0782,
"step": 42000
},
{
"epoch": 4.3112297269554505,
"eval_cer": 0.026954976303317536,
"eval_loss": 0.04903939738869667,
"eval_runtime": 22.5525,
"eval_samples_per_second": 3.991,
"eval_steps_per_second": 0.044,
"eval_wer": 0.09204647006255585,
"step": 42000
},
{
"epoch": 4.413878053787723,
"grad_norm": 0.29210948944091797,
"learning_rate": 0.00045679901254885825,
"loss": 0.0779,
"step": 43000
},
{
"epoch": 4.413878053787723,
"eval_cer": 0.02754739336492891,
"eval_loss": 0.04861212149262428,
"eval_runtime": 22.0013,
"eval_samples_per_second": 4.091,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09204647006255585,
"step": 43000
},
{
"epoch": 4.516526380619996,
"grad_norm": 0.19757212698459625,
"learning_rate": 0.00045577041760954534,
"loss": 0.0786,
"step": 44000
},
{
"epoch": 4.516526380619996,
"eval_cer": 0.023992890995260665,
"eval_loss": 0.04914968088269234,
"eval_runtime": 22.6286,
"eval_samples_per_second": 3.977,
"eval_steps_per_second": 0.044,
"eval_wer": 0.08936550491510277,
"step": 44000
},
{
"epoch": 4.619174707452268,
"grad_norm": 0.23518621921539307,
"learning_rate": 0.00045474182267023244,
"loss": 0.0778,
"step": 45000
},
{
"epoch": 4.619174707452268,
"eval_cer": 0.025770142180094786,
"eval_loss": 0.04797298088669777,
"eval_runtime": 22.0387,
"eval_samples_per_second": 4.084,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08847184986595175,
"step": 45000
},
{
"epoch": 4.721823034284541,
"grad_norm": 0.18045727908611298,
"learning_rate": 0.00045371322773091953,
"loss": 0.0782,
"step": 46000
},
{
"epoch": 4.721823034284541,
"eval_cer": 0.023252369668246446,
"eval_loss": 0.049161382019519806,
"eval_runtime": 22.0981,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08579088471849866,
"step": 46000
},
{
"epoch": 4.824471361116814,
"grad_norm": 0.2216973602771759,
"learning_rate": 0.0004526846327916067,
"loss": 0.0778,
"step": 47000
},
{
"epoch": 4.824471361116814,
"eval_cer": 0.027103080568720378,
"eval_loss": 0.04854327812790871,
"eval_runtime": 22.1254,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09204647006255585,
"step": 47000
},
{
"epoch": 4.927119687949086,
"grad_norm": 0.21196693181991577,
"learning_rate": 0.00045165603785229377,
"loss": 0.078,
"step": 48000
},
{
"epoch": 4.927119687949086,
"eval_cer": 0.0254739336492891,
"eval_loss": 0.04801648482680321,
"eval_runtime": 22.3465,
"eval_samples_per_second": 4.027,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08847184986595175,
"step": 48000
},
{
"epoch": 5.029768014781359,
"grad_norm": 0.22312557697296143,
"learning_rate": 0.00045062744291298086,
"loss": 0.0771,
"step": 49000
},
{
"epoch": 5.029768014781359,
"eval_cer": 0.027843601895734597,
"eval_loss": 0.04988682270050049,
"eval_runtime": 22.2617,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0938337801608579,
"step": 49000
},
{
"epoch": 5.132416341613632,
"grad_norm": 0.22252117097377777,
"learning_rate": 0.00044959884797366796,
"loss": 0.0753,
"step": 50000
},
{
"epoch": 5.132416341613632,
"eval_cer": 0.024140995260663507,
"eval_loss": 0.04682554677128792,
"eval_runtime": 22.1777,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08936550491510277,
"step": 50000
},
{
"epoch": 5.235064668445904,
"grad_norm": 0.15847323834896088,
"learning_rate": 0.00044857025303435505,
"loss": 0.0756,
"step": 51000
},
{
"epoch": 5.235064668445904,
"eval_cer": 0.02502962085308057,
"eval_loss": 0.047456566244363785,
"eval_runtime": 22.3306,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08847184986595175,
"step": 51000
},
{
"epoch": 5.337712995278177,
"grad_norm": 0.2008858174085617,
"learning_rate": 0.00044754165809504214,
"loss": 0.0763,
"step": 52000
},
{
"epoch": 5.337712995278177,
"eval_cer": 0.021178909952606635,
"eval_loss": 0.0479045994579792,
"eval_runtime": 22.2995,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0777479892761394,
"step": 52000
},
{
"epoch": 5.44036132211045,
"grad_norm": 0.31161248683929443,
"learning_rate": 0.00044651306315572923,
"loss": 0.0761,
"step": 53000
},
{
"epoch": 5.44036132211045,
"eval_cer": 0.026066350710900472,
"eval_loss": 0.047916192561388016,
"eval_runtime": 22.0152,
"eval_samples_per_second": 4.088,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08847184986595175,
"step": 53000
},
{
"epoch": 5.543009648942722,
"grad_norm": 0.19102248549461365,
"learning_rate": 0.0004454844682164164,
"loss": 0.0756,
"step": 54000
},
{
"epoch": 5.543009648942722,
"eval_cer": 0.023252369668246446,
"eval_loss": 0.04646703228354454,
"eval_runtime": 22.0815,
"eval_samples_per_second": 4.076,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08668453976764968,
"step": 54000
},
{
"epoch": 5.645657975774995,
"grad_norm": 0.3801836669445038,
"learning_rate": 0.0004444558732771035,
"loss": 0.0758,
"step": 55000
},
{
"epoch": 5.645657975774995,
"eval_cer": 0.025770142180094786,
"eval_loss": 0.04690609872341156,
"eval_runtime": 22.1797,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09294012511170688,
"step": 55000
},
{
"epoch": 5.748306302607268,
"grad_norm": 0.24007199704647064,
"learning_rate": 0.00044342727833779057,
"loss": 0.0751,
"step": 56000
},
{
"epoch": 5.748306302607268,
"eval_cer": 0.026066350710900472,
"eval_loss": 0.04649204760789871,
"eval_runtime": 22.2606,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0902591599642538,
"step": 56000
},
{
"epoch": 5.85095462943954,
"grad_norm": 0.177778959274292,
"learning_rate": 0.00044239868339847766,
"loss": 0.0751,
"step": 57000
},
{
"epoch": 5.85095462943954,
"eval_cer": 0.02665876777251185,
"eval_loss": 0.04591357707977295,
"eval_runtime": 22.0753,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09294012511170688,
"step": 57000
},
{
"epoch": 5.953602956271813,
"grad_norm": 0.14689143002033234,
"learning_rate": 0.00044137008845916475,
"loss": 0.0754,
"step": 58000
},
{
"epoch": 5.953602956271813,
"eval_cer": 0.025622037914691944,
"eval_loss": 0.04754678159952164,
"eval_runtime": 22.1351,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.09294012511170688,
"step": 58000
},
{
"epoch": 6.056251283104086,
"grad_norm": 0.1848069429397583,
"learning_rate": 0.00044034149351985185,
"loss": 0.0738,
"step": 59000
},
{
"epoch": 6.056251283104086,
"eval_cer": 0.027103080568720378,
"eval_loss": 0.04577142372727394,
"eval_runtime": 22.1342,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08936550491510277,
"step": 59000
},
{
"epoch": 6.158899609936358,
"grad_norm": 0.22594769299030304,
"learning_rate": 0.00043931289858053894,
"loss": 0.0735,
"step": 60000
},
{
"epoch": 6.158899609936358,
"eval_cer": 0.025177725118483412,
"eval_loss": 0.04629523307085037,
"eval_runtime": 22.2033,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08847184986595175,
"step": 60000
},
{
"epoch": 6.261547936768631,
"grad_norm": 0.20947369933128357,
"learning_rate": 0.0004382843036412261,
"loss": 0.0734,
"step": 61000
},
{
"epoch": 6.261547936768631,
"eval_cer": 0.026510663507109004,
"eval_loss": 0.04853444546461105,
"eval_runtime": 21.9491,
"eval_samples_per_second": 4.1,
"eval_steps_per_second": 0.046,
"eval_wer": 0.09204647006255585,
"step": 61000
},
{
"epoch": 6.3641962636009035,
"grad_norm": 0.2018922120332718,
"learning_rate": 0.0004372557087019132,
"loss": 0.0734,
"step": 62000
},
{
"epoch": 6.3641962636009035,
"eval_cer": 0.022808056872037914,
"eval_loss": 0.04681561887264252,
"eval_runtime": 22.0488,
"eval_samples_per_second": 4.082,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07864164432529044,
"step": 62000
},
{
"epoch": 6.466844590433176,
"grad_norm": 0.2090204358100891,
"learning_rate": 0.0004362271137626003,
"loss": 0.0737,
"step": 63000
},
{
"epoch": 6.466844590433176,
"eval_cer": 0.03865521327014218,
"eval_loss": 0.046171579509973526,
"eval_runtime": 24.3572,
"eval_samples_per_second": 3.695,
"eval_steps_per_second": 0.041,
"eval_wer": 0.09651474530831099,
"step": 63000
},
{
"epoch": 6.569492917265449,
"grad_norm": 0.21769754588603973,
"learning_rate": 0.0004351985188232874,
"loss": 0.0726,
"step": 64000
},
{
"epoch": 6.569492917265449,
"eval_cer": 0.0254739336492891,
"eval_loss": 0.047106679528951645,
"eval_runtime": 21.6323,
"eval_samples_per_second": 4.16,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08936550491510277,
"step": 64000
},
{
"epoch": 6.6721412440977215,
"grad_norm": 0.17357899248600006,
"learning_rate": 0.0004341699238839745,
"loss": 0.0734,
"step": 65000
},
{
"epoch": 6.6721412440977215,
"eval_cer": 0.026214454976303318,
"eval_loss": 0.04763852432370186,
"eval_runtime": 21.7494,
"eval_samples_per_second": 4.138,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08757819481680071,
"step": 65000
},
{
"epoch": 6.774789570929994,
"grad_norm": 0.23694564402103424,
"learning_rate": 0.0004331413289446616,
"loss": 0.0737,
"step": 66000
},
{
"epoch": 6.774789570929994,
"eval_cer": 0.02384478672985782,
"eval_loss": 0.04740356281399727,
"eval_runtime": 21.8747,
"eval_samples_per_second": 4.114,
"eval_steps_per_second": 0.046,
"eval_wer": 0.0840035746201966,
"step": 66000
},
{
"epoch": 6.877437897762267,
"grad_norm": 0.24957220256328583,
"learning_rate": 0.00043211273400534876,
"loss": 0.0725,
"step": 67000
},
{
"epoch": 6.877437897762267,
"eval_cer": 0.023548578199052133,
"eval_loss": 0.04718530550599098,
"eval_runtime": 22.1234,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08132260947274352,
"step": 67000
},
{
"epoch": 6.980086224594539,
"grad_norm": 0.20797890424728394,
"learning_rate": 0.00043108413906603585,
"loss": 0.0728,
"step": 68000
},
{
"epoch": 6.980086224594539,
"eval_cer": 0.02384478672985782,
"eval_loss": 0.0460374690592289,
"eval_runtime": 22.126,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08042895442359249,
"step": 68000
},
{
"epoch": 7.082734551426812,
"grad_norm": 0.21543003618717194,
"learning_rate": 0.00043005554412672294,
"loss": 0.0714,
"step": 69000
},
{
"epoch": 7.082734551426812,
"eval_cer": 0.024881516587677725,
"eval_loss": 0.04654213413596153,
"eval_runtime": 22.1075,
"eval_samples_per_second": 4.071,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08579088471849866,
"step": 69000
},
{
"epoch": 7.185382878259085,
"grad_norm": 0.2328251153230667,
"learning_rate": 0.00042902694918741004,
"loss": 0.0711,
"step": 70000
},
{
"epoch": 7.185382878259085,
"eval_cer": 0.02458530805687204,
"eval_loss": 0.0458548367023468,
"eval_runtime": 22.1559,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08489722966934764,
"step": 70000
},
{
"epoch": 7.288031205091357,
"grad_norm": 0.2861992418766022,
"learning_rate": 0.00042799835424809713,
"loss": 0.0711,
"step": 71000
},
{
"epoch": 7.288031205091357,
"eval_cer": 0.0254739336492891,
"eval_loss": 0.04648789018392563,
"eval_runtime": 22.2028,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08489722966934764,
"step": 71000
},
{
"epoch": 7.39067953192363,
"grad_norm": 0.21449844539165497,
"learning_rate": 0.0004269697593087842,
"loss": 0.0709,
"step": 72000
},
{
"epoch": 7.39067953192363,
"eval_cer": 0.023548578199052133,
"eval_loss": 0.04555143415927887,
"eval_runtime": 22.2316,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0777479892761394,
"step": 72000
},
{
"epoch": 7.493327858755903,
"grad_norm": 0.21393579244613647,
"learning_rate": 0.0004259411643694713,
"loss": 0.0719,
"step": 73000
},
{
"epoch": 7.493327858755903,
"eval_cer": 0.023548578199052133,
"eval_loss": 0.04599784314632416,
"eval_runtime": 22.1857,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0777479892761394,
"step": 73000
},
{
"epoch": 7.5959761855881744,
"grad_norm": 0.26174065470695496,
"learning_rate": 0.00042491256943015846,
"loss": 0.0707,
"step": 74000
},
{
"epoch": 7.5959761855881744,
"eval_cer": 0.026362559241706163,
"eval_loss": 0.046083446592092514,
"eval_runtime": 22.1298,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08668453976764968,
"step": 74000
},
{
"epoch": 7.698624512420448,
"grad_norm": 0.23440520465373993,
"learning_rate": 0.00042388397449084556,
"loss": 0.0707,
"step": 75000
},
{
"epoch": 7.698624512420448,
"eval_cer": 0.02502962085308057,
"eval_loss": 0.046040162444114685,
"eval_runtime": 22.0318,
"eval_samples_per_second": 4.085,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08579088471849866,
"step": 75000
},
{
"epoch": 7.80127283925272,
"grad_norm": 0.210636705160141,
"learning_rate": 0.00042285537955153265,
"loss": 0.0717,
"step": 76000
},
{
"epoch": 7.80127283925272,
"eval_cer": 0.023252369668246446,
"eval_loss": 0.044275738298892975,
"eval_runtime": 22.1574,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08132260947274352,
"step": 76000
},
{
"epoch": 7.903921166084993,
"grad_norm": 0.19636699557304382,
"learning_rate": 0.00042182678461221974,
"loss": 0.0713,
"step": 77000
},
{
"epoch": 7.903921166084993,
"eval_cer": 0.025177725118483412,
"eval_loss": 0.04342404752969742,
"eval_runtime": 22.2249,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08221626452189455,
"step": 77000
},
{
"epoch": 8.006569492917265,
"grad_norm": 0.23213474452495575,
"learning_rate": 0.00042079818967290683,
"loss": 0.0703,
"step": 78000
},
{
"epoch": 8.006569492917265,
"eval_cer": 0.02428909952606635,
"eval_loss": 0.04461972787976265,
"eval_runtime": 22.2476,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08489722966934764,
"step": 78000
},
{
"epoch": 8.109217819749539,
"grad_norm": 0.5032120943069458,
"learning_rate": 0.00041976959473359393,
"loss": 0.0692,
"step": 79000
},
{
"epoch": 8.109217819749539,
"eval_cer": 0.025177725118483412,
"eval_loss": 0.0442008450627327,
"eval_runtime": 22.2279,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08310991957104558,
"step": 79000
},
{
"epoch": 8.21186614658181,
"grad_norm": 0.2267637550830841,
"learning_rate": 0.000418740999794281,
"loss": 0.0694,
"step": 80000
},
{
"epoch": 8.21186614658181,
"eval_cer": 0.02473341232227488,
"eval_loss": 0.041503287851810455,
"eval_runtime": 22.2205,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08310991957104558,
"step": 80000
},
{
"epoch": 8.314514473414084,
"grad_norm": 0.22922959923744202,
"learning_rate": 0.00041771240485496817,
"loss": 0.0698,
"step": 81000
},
{
"epoch": 8.314514473414084,
"eval_cer": 0.02132701421800948,
"eval_loss": 0.0416925847530365,
"eval_runtime": 22.7674,
"eval_samples_per_second": 3.953,
"eval_steps_per_second": 0.044,
"eval_wer": 0.07506702412868632,
"step": 81000
},
{
"epoch": 8.417162800246356,
"grad_norm": 0.2807318866252899,
"learning_rate": 0.00041668380991565526,
"loss": 0.0687,
"step": 82000
},
{
"epoch": 8.417162800246356,
"eval_cer": 0.021475118483412322,
"eval_loss": 0.042673755437135696,
"eval_runtime": 21.9781,
"eval_samples_per_second": 4.095,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07327971403038427,
"step": 82000
},
{
"epoch": 8.51981112707863,
"grad_norm": 0.19773901998996735,
"learning_rate": 0.00041565521497634235,
"loss": 0.0697,
"step": 83000
},
{
"epoch": 8.51981112707863,
"eval_cer": 0.025770142180094786,
"eval_loss": 0.04353732243180275,
"eval_runtime": 21.9415,
"eval_samples_per_second": 4.102,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08668453976764968,
"step": 83000
},
{
"epoch": 8.622459453910901,
"grad_norm": 0.2049770951271057,
"learning_rate": 0.00041462662003702945,
"loss": 0.0689,
"step": 84000
},
{
"epoch": 8.622459453910901,
"eval_cer": 0.02295616113744076,
"eval_loss": 0.044080935418605804,
"eval_runtime": 22.0767,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08042895442359249,
"step": 84000
},
{
"epoch": 8.725107780743175,
"grad_norm": 0.19069017469882965,
"learning_rate": 0.00041359802509771654,
"loss": 0.0692,
"step": 85000
},
{
"epoch": 8.725107780743175,
"eval_cer": 0.022363744075829382,
"eval_loss": 0.042362380772829056,
"eval_runtime": 22.0792,
"eval_samples_per_second": 4.076,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07864164432529044,
"step": 85000
},
{
"epoch": 8.827756107575446,
"grad_norm": 0.26284459233283997,
"learning_rate": 0.00041256943015840363,
"loss": 0.0694,
"step": 86000
},
{
"epoch": 8.827756107575446,
"eval_cer": 0.08486374407582939,
"eval_loss": 0.04263956472277641,
"eval_runtime": 39.7879,
"eval_samples_per_second": 2.262,
"eval_steps_per_second": 0.025,
"eval_wer": 0.13762287756925826,
"step": 86000
},
{
"epoch": 8.93040443440772,
"grad_norm": 0.12760530412197113,
"learning_rate": 0.00041154083521909073,
"loss": 0.0691,
"step": 87000
},
{
"epoch": 8.93040443440772,
"eval_cer": 0.02502962085308057,
"eval_loss": 0.043798867613077164,
"eval_runtime": 23.4939,
"eval_samples_per_second": 3.831,
"eval_steps_per_second": 0.043,
"eval_wer": 0.08936550491510277,
"step": 87000
},
{
"epoch": 9.033052761239992,
"grad_norm": 0.3002149760723114,
"learning_rate": 0.0004105122402797779,
"loss": 0.0689,
"step": 88000
},
{
"epoch": 9.033052761239992,
"eval_cer": 0.03036137440758294,
"eval_loss": 0.04081055149435997,
"eval_runtime": 28.0519,
"eval_samples_per_second": 3.208,
"eval_steps_per_second": 0.036,
"eval_wer": 0.08489722966934764,
"step": 88000
},
{
"epoch": 9.135701088072265,
"grad_norm": 0.3071158230304718,
"learning_rate": 0.00040948364534046497,
"loss": 0.0672,
"step": 89000
},
{
"epoch": 9.135701088072265,
"eval_cer": 0.028880331753554502,
"eval_loss": 0.0426529198884964,
"eval_runtime": 21.5055,
"eval_samples_per_second": 4.185,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08042895442359249,
"step": 89000
},
{
"epoch": 9.238349414904537,
"grad_norm": 0.2852329909801483,
"learning_rate": 0.00040845505040115206,
"loss": 0.0672,
"step": 90000
},
{
"epoch": 9.238349414904537,
"eval_cer": 0.023252369668246446,
"eval_loss": 0.04157470539212227,
"eval_runtime": 20.7416,
"eval_samples_per_second": 4.339,
"eval_steps_per_second": 0.048,
"eval_wer": 0.0777479892761394,
"step": 90000
},
{
"epoch": 9.34099774173681,
"grad_norm": 0.2886292338371277,
"learning_rate": 0.00040742645546183915,
"loss": 0.0677,
"step": 91000
},
{
"epoch": 9.34099774173681,
"eval_cer": 0.029768957345971563,
"eval_loss": 0.04205571115016937,
"eval_runtime": 21.8561,
"eval_samples_per_second": 4.118,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08310991957104558,
"step": 91000
},
{
"epoch": 9.443646068569082,
"grad_norm": 0.20502915978431702,
"learning_rate": 0.00040639786052252625,
"loss": 0.068,
"step": 92000
},
{
"epoch": 9.443646068569082,
"eval_cer": 0.050651658767772514,
"eval_loss": 0.04323223605751991,
"eval_runtime": 34.4159,
"eval_samples_per_second": 2.615,
"eval_steps_per_second": 0.029,
"eval_wer": 0.10723860589812333,
"step": 92000
},
{
"epoch": 9.546294395401356,
"grad_norm": 0.31369203329086304,
"learning_rate": 0.00040536926558321334,
"loss": 0.0675,
"step": 93000
},
{
"epoch": 9.546294395401356,
"eval_cer": 0.023548578199052133,
"eval_loss": 0.040974486619234085,
"eval_runtime": 21.5483,
"eval_samples_per_second": 4.177,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08042895442359249,
"step": 93000
},
{
"epoch": 9.648942722233627,
"grad_norm": 0.2164764255285263,
"learning_rate": 0.00040434067064390043,
"loss": 0.0674,
"step": 94000
},
{
"epoch": 9.648942722233627,
"eval_cer": 0.021623222748815167,
"eval_loss": 0.04193877801299095,
"eval_runtime": 20.5784,
"eval_samples_per_second": 4.374,
"eval_steps_per_second": 0.049,
"eval_wer": 0.0741733690795353,
"step": 94000
},
{
"epoch": 9.751591049065901,
"grad_norm": 0.20598456263542175,
"learning_rate": 0.0004033120757045876,
"loss": 0.0683,
"step": 95000
},
{
"epoch": 9.751591049065901,
"eval_cer": 0.021623222748815167,
"eval_loss": 0.04229186475276947,
"eval_runtime": 21.7515,
"eval_samples_per_second": 4.138,
"eval_steps_per_second": 0.046,
"eval_wer": 0.07685433422698838,
"step": 95000
},
{
"epoch": 9.854239375898173,
"grad_norm": 0.17217130959033966,
"learning_rate": 0.0004022834807652747,
"loss": 0.0679,
"step": 96000
},
{
"epoch": 9.854239375898173,
"eval_cer": 0.02221563981042654,
"eval_loss": 0.04353512451052666,
"eval_runtime": 21.856,
"eval_samples_per_second": 4.118,
"eval_steps_per_second": 0.046,
"eval_wer": 0.08042895442359249,
"step": 96000
},
{
"epoch": 9.956887702730446,
"grad_norm": 0.30278000235557556,
"learning_rate": 0.00040125488582596177,
"loss": 0.0676,
"step": 97000
},
{
"epoch": 9.956887702730446,
"eval_cer": 0.021623222748815167,
"eval_loss": 0.04357661306858063,
"eval_runtime": 21.958,
"eval_samples_per_second": 4.099,
"eval_steps_per_second": 0.046,
"eval_wer": 0.07685433422698838,
"step": 97000
},
{
"epoch": 10.059536029562718,
"grad_norm": 0.17858092486858368,
"learning_rate": 0.00040022629088664886,
"loss": 0.0659,
"step": 98000
},
{
"epoch": 10.059536029562718,
"eval_cer": 0.02428909952606635,
"eval_loss": 0.04361404851078987,
"eval_runtime": 22.0796,
"eval_samples_per_second": 4.076,
"eval_steps_per_second": 0.045,
"eval_wer": 0.08221626452189455,
"step": 98000
},
{
"epoch": 10.162184356394992,
"grad_norm": 0.16541603207588196,
"learning_rate": 0.00039919769594733595,
"loss": 0.066,
"step": 99000
},
{
"epoch": 10.162184356394992,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.04267050698399544,
"eval_runtime": 21.9748,
"eval_samples_per_second": 4.096,
"eval_steps_per_second": 0.046,
"eval_wer": 0.07149240393208221,
"step": 99000
},
{
"epoch": 10.264832683227263,
"grad_norm": 0.16135546565055847,
"learning_rate": 0.00039816910100802305,
"loss": 0.066,
"step": 100000
},
{
"epoch": 10.264832683227263,
"eval_cer": 0.02458530805687204,
"eval_loss": 0.043835073709487915,
"eval_runtime": 22.2259,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0840035746201966,
"step": 100000
},
{
"epoch": 10.367481010059535,
"grad_norm": 0.3332918882369995,
"learning_rate": 0.00039714050606871014,
"loss": 0.0667,
"step": 101000
},
{
"epoch": 10.367481010059535,
"eval_cer": 0.021919431279620854,
"eval_loss": 0.04083102196455002,
"eval_runtime": 22.2761,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0777479892761394,
"step": 101000
},
{
"epoch": 10.470129336891809,
"grad_norm": 0.2997848093509674,
"learning_rate": 0.0003961119111293973,
"loss": 0.0659,
"step": 102000
},
{
"epoch": 10.470129336891809,
"eval_cer": 0.021475118483412322,
"eval_loss": 0.04166368022561073,
"eval_runtime": 22.1637,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07327971403038427,
"step": 102000
},
{
"epoch": 10.572777663724082,
"grad_norm": 0.2358590066432953,
"learning_rate": 0.0003950833161900844,
"loss": 0.0661,
"step": 103000
},
{
"epoch": 10.572777663724082,
"eval_cer": 0.018957345971563982,
"eval_loss": 0.04044094681739807,
"eval_runtime": 22.1865,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 103000
},
{
"epoch": 10.675425990556354,
"grad_norm": 0.2147412747144699,
"learning_rate": 0.0003940547212507715,
"loss": 0.0662,
"step": 104000
},
{
"epoch": 10.675425990556354,
"eval_cer": 0.022067535545023696,
"eval_loss": 0.042136672884225845,
"eval_runtime": 22.3442,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0741733690795353,
"step": 104000
},
{
"epoch": 10.778074317388626,
"grad_norm": 0.21466469764709473,
"learning_rate": 0.00039302612631145857,
"loss": 0.066,
"step": 105000
},
{
"epoch": 10.778074317388626,
"eval_cer": 0.020290284360189575,
"eval_loss": 0.04227345809340477,
"eval_runtime": 22.2243,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06881143878462913,
"step": 105000
},
{
"epoch": 10.8807226442209,
"grad_norm": 0.20175763964653015,
"learning_rate": 0.00039199753137214566,
"loss": 0.0665,
"step": 106000
},
{
"epoch": 10.8807226442209,
"eval_cer": 0.021919431279620854,
"eval_loss": 0.04261546581983566,
"eval_runtime": 22.0605,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07327971403038427,
"step": 106000
},
{
"epoch": 10.983370971053171,
"grad_norm": 0.3140750229358673,
"learning_rate": 0.00039096893643283275,
"loss": 0.066,
"step": 107000
},
{
"epoch": 10.983370971053171,
"eval_cer": 0.018957345971563982,
"eval_loss": 0.040347784757614136,
"eval_runtime": 23.0302,
"eval_samples_per_second": 3.908,
"eval_steps_per_second": 0.043,
"eval_wer": 0.06523681858802502,
"step": 107000
},
{
"epoch": 11.086019297885445,
"grad_norm": 0.2863214313983917,
"learning_rate": 0.00038994034149351985,
"loss": 0.0643,
"step": 108000
},
{
"epoch": 11.086019297885445,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.040664974600076675,
"eval_runtime": 22.169,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07149240393208221,
"step": 108000
},
{
"epoch": 11.188667624717716,
"grad_norm": 0.1757292002439499,
"learning_rate": 0.000388911746554207,
"loss": 0.0644,
"step": 109000
},
{
"epoch": 11.188667624717716,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.03967958316206932,
"eval_runtime": 22.6062,
"eval_samples_per_second": 3.981,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06344950848972297,
"step": 109000
},
{
"epoch": 11.29131595154999,
"grad_norm": 0.25020548701286316,
"learning_rate": 0.0003878831516148941,
"loss": 0.0646,
"step": 110000
},
{
"epoch": 11.29131595154999,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.0391419492661953,
"eval_runtime": 21.9075,
"eval_samples_per_second": 4.108,
"eval_steps_per_second": 0.046,
"eval_wer": 0.06881143878462913,
"step": 110000
},
{
"epoch": 11.393964278382262,
"grad_norm": 0.2499699741601944,
"learning_rate": 0.0003868545566755812,
"loss": 0.0643,
"step": 111000
},
{
"epoch": 11.393964278382262,
"eval_cer": 0.02058649289099526,
"eval_loss": 0.04037711024284363,
"eval_runtime": 22.1604,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0741733690795353,
"step": 111000
},
{
"epoch": 11.496612605214535,
"grad_norm": 0.16554132103919983,
"learning_rate": 0.0003858259617362683,
"loss": 0.0649,
"step": 112000
},
{
"epoch": 11.496612605214535,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.04021435230970383,
"eval_runtime": 22.1316,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07327971403038427,
"step": 112000
},
{
"epoch": 11.599260932046807,
"grad_norm": 0.18748946487903595,
"learning_rate": 0.00038479736679695537,
"loss": 0.0655,
"step": 113000
},
{
"epoch": 11.599260932046807,
"eval_cer": 0.022511848341232227,
"eval_loss": 0.040546808391809464,
"eval_runtime": 22.1622,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07596067917783736,
"step": 113000
},
{
"epoch": 11.70190925887908,
"grad_norm": 0.4686923623085022,
"learning_rate": 0.00038376877185764246,
"loss": 0.0647,
"step": 114000
},
{
"epoch": 11.70190925887908,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.04154360666871071,
"eval_runtime": 22.0458,
"eval_samples_per_second": 4.082,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0679177837354781,
"step": 114000
},
{
"epoch": 11.804557585711352,
"grad_norm": 0.20417150855064392,
"learning_rate": 0.00038274017691832955,
"loss": 0.0649,
"step": 115000
},
{
"epoch": 11.804557585711352,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.04089859500527382,
"eval_runtime": 22.0112,
"eval_samples_per_second": 4.089,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06702412868632708,
"step": 115000
},
{
"epoch": 11.907205912543626,
"grad_norm": 0.27801695466041565,
"learning_rate": 0.0003817115819790167,
"loss": 0.0643,
"step": 116000
},
{
"epoch": 11.907205912543626,
"eval_cer": 0.020438388625592416,
"eval_loss": 0.04090258479118347,
"eval_runtime": 22.1223,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 116000
},
{
"epoch": 12.009854239375898,
"grad_norm": 0.24761049449443817,
"learning_rate": 0.0003806829870397038,
"loss": 0.0641,
"step": 117000
},
{
"epoch": 12.009854239375898,
"eval_cer": 0.02103080568720379,
"eval_loss": 0.041068486869335175,
"eval_runtime": 22.0016,
"eval_samples_per_second": 4.091,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0741733690795353,
"step": 117000
},
{
"epoch": 12.112502566208171,
"grad_norm": 0.18434394896030426,
"learning_rate": 0.0003796543921003909,
"loss": 0.0625,
"step": 118000
},
{
"epoch": 12.112502566208171,
"eval_cer": 0.020438388625592416,
"eval_loss": 0.04070517420768738,
"eval_runtime": 22.1605,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07149240393208221,
"step": 118000
},
{
"epoch": 12.215150893040443,
"grad_norm": 0.24979180097579956,
"learning_rate": 0.000378625797161078,
"loss": 0.0629,
"step": 119000
},
{
"epoch": 12.215150893040443,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.038933165371418,
"eval_runtime": 22.1583,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 119000
},
{
"epoch": 12.317799219872716,
"grad_norm": 0.29532870650291443,
"learning_rate": 0.0003775972022217651,
"loss": 0.0634,
"step": 120000
},
{
"epoch": 12.317799219872716,
"eval_cer": 0.020438388625592416,
"eval_loss": 0.04176652058959007,
"eval_runtime": 22.5206,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.07506702412868632,
"step": 120000
},
{
"epoch": 12.420447546704988,
"grad_norm": 0.2764800190925598,
"learning_rate": 0.00037656860728245217,
"loss": 0.0628,
"step": 121000
},
{
"epoch": 12.420447546704988,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.038935501128435135,
"eval_runtime": 22.4914,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06970509383378017,
"step": 121000
},
{
"epoch": 12.523095873537262,
"grad_norm": 0.20742572844028473,
"learning_rate": 0.00037554001234313926,
"loss": 0.0638,
"step": 122000
},
{
"epoch": 12.523095873537262,
"eval_cer": 0.020734597156398103,
"eval_loss": 0.040505990386009216,
"eval_runtime": 22.4563,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 122000
},
{
"epoch": 12.625744200369533,
"grad_norm": 0.22677256166934967,
"learning_rate": 0.0003745114174038264,
"loss": 0.063,
"step": 123000
},
{
"epoch": 12.625744200369533,
"eval_cer": 0.02058649289099526,
"eval_loss": 0.0383678562939167,
"eval_runtime": 22.3288,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06881143878462913,
"step": 123000
},
{
"epoch": 12.728392527201807,
"grad_norm": 0.2105027288198471,
"learning_rate": 0.0003734828224645135,
"loss": 0.0633,
"step": 124000
},
{
"epoch": 12.728392527201807,
"eval_cer": 0.01925355450236967,
"eval_loss": 0.03879451006650925,
"eval_runtime": 22.3197,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 124000
},
{
"epoch": 12.831040854034079,
"grad_norm": 0.21093320846557617,
"learning_rate": 0.0003724542275252006,
"loss": 0.0636,
"step": 125000
},
{
"epoch": 12.831040854034079,
"eval_cer": 0.020734597156398103,
"eval_loss": 0.038905300199985504,
"eval_runtime": 22.1791,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06702412868632708,
"step": 125000
},
{
"epoch": 12.933689180866352,
"grad_norm": 0.21501018106937408,
"learning_rate": 0.0003714256325858877,
"loss": 0.0634,
"step": 126000
},
{
"epoch": 12.933689180866352,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.03828004002571106,
"eval_runtime": 22.175,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 126000
},
{
"epoch": 13.036337507698624,
"grad_norm": 0.2869448661804199,
"learning_rate": 0.0003703970376465748,
"loss": 0.0623,
"step": 127000
},
{
"epoch": 13.036337507698624,
"eval_cer": 0.02221563981042654,
"eval_loss": 0.03908955305814743,
"eval_runtime": 22.0973,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0741733690795353,
"step": 127000
},
{
"epoch": 13.138985834530898,
"grad_norm": 0.20509861409664154,
"learning_rate": 0.00036936844270726187,
"loss": 0.0616,
"step": 128000
},
{
"epoch": 13.138985834530898,
"eval_cer": 0.021919431279620854,
"eval_loss": 0.0386139452457428,
"eval_runtime": 22.2024,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07149240393208221,
"step": 128000
},
{
"epoch": 13.24163416136317,
"grad_norm": 0.17765522003173828,
"learning_rate": 0.00036833984776794897,
"loss": 0.0618,
"step": 129000
},
{
"epoch": 13.24163416136317,
"eval_cer": 0.01925355450236967,
"eval_loss": 0.0378284677863121,
"eval_runtime": 22.0041,
"eval_samples_per_second": 4.09,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 129000
},
{
"epoch": 13.344282488195443,
"grad_norm": 0.24877817928791046,
"learning_rate": 0.0003673112528286361,
"loss": 0.0619,
"step": 130000
},
{
"epoch": 13.344282488195443,
"eval_cer": 0.021475118483412322,
"eval_loss": 0.03674837946891785,
"eval_runtime": 22.0962,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0741733690795353,
"step": 130000
},
{
"epoch": 13.446930815027715,
"grad_norm": 0.25357529520988464,
"learning_rate": 0.0003662826578893232,
"loss": 0.0623,
"step": 131000
},
{
"epoch": 13.446930815027715,
"eval_cer": 0.019994075829383885,
"eval_loss": 0.03821048513054848,
"eval_runtime": 22.1382,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06970509383378017,
"step": 131000
},
{
"epoch": 13.549579141859988,
"grad_norm": 0.32752060890197754,
"learning_rate": 0.0003652540629500103,
"loss": 0.0622,
"step": 132000
},
{
"epoch": 13.549579141859988,
"eval_cer": 0.02014218009478673,
"eval_loss": 0.038477059453725815,
"eval_runtime": 22.2355,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06702412868632708,
"step": 132000
},
{
"epoch": 13.65222746869226,
"grad_norm": 0.22504030168056488,
"learning_rate": 0.0003642254680106974,
"loss": 0.0622,
"step": 133000
},
{
"epoch": 13.65222746869226,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.038305170834064484,
"eval_runtime": 22.2134,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06970509383378017,
"step": 133000
},
{
"epoch": 13.754875795524534,
"grad_norm": 0.23793594539165497,
"learning_rate": 0.0003631968730713845,
"loss": 0.062,
"step": 134000
},
{
"epoch": 13.754875795524534,
"eval_cer": 0.02058649289099526,
"eval_loss": 0.03838730975985527,
"eval_runtime": 22.3142,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0679177837354781,
"step": 134000
},
{
"epoch": 13.857524122356805,
"grad_norm": 0.18214410543441772,
"learning_rate": 0.0003621682781320716,
"loss": 0.0625,
"step": 135000
},
{
"epoch": 13.857524122356805,
"eval_cer": 0.02014218009478673,
"eval_loss": 0.03858475759625435,
"eval_runtime": 22.3822,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06881143878462913,
"step": 135000
},
{
"epoch": 13.960172449189079,
"grad_norm": 0.24008499085903168,
"learning_rate": 0.00036113968319275867,
"loss": 0.0621,
"step": 136000
},
{
"epoch": 13.960172449189079,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.03749372810125351,
"eval_runtime": 22.3573,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06702412868632708,
"step": 136000
},
{
"epoch": 14.06282077602135,
"grad_norm": 0.20032504200935364,
"learning_rate": 0.0003601110882534458,
"loss": 0.0616,
"step": 137000
},
{
"epoch": 14.06282077602135,
"eval_cer": 0.02058649289099526,
"eval_loss": 0.03601989150047302,
"eval_runtime": 22.1524,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06970509383378017,
"step": 137000
},
{
"epoch": 14.165469102853624,
"grad_norm": 0.29345226287841797,
"learning_rate": 0.0003590824933141329,
"loss": 0.0611,
"step": 138000
},
{
"epoch": 14.165469102853624,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.036261823028326035,
"eval_runtime": 22.1903,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 138000
},
{
"epoch": 14.268117429685896,
"grad_norm": 0.19695940613746643,
"learning_rate": 0.00035805389837482,
"loss": 0.0606,
"step": 139000
},
{
"epoch": 14.268117429685896,
"eval_cer": 0.021623222748815167,
"eval_loss": 0.0368872731924057,
"eval_runtime": 22.1734,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07238605898123325,
"step": 139000
},
{
"epoch": 14.37076575651817,
"grad_norm": 0.1869696080684662,
"learning_rate": 0.0003570253034355071,
"loss": 0.0609,
"step": 140000
},
{
"epoch": 14.37076575651817,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.036232832819223404,
"eval_runtime": 22.2094,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 140000
},
{
"epoch": 14.473414083350441,
"grad_norm": 0.3025355041027069,
"learning_rate": 0.0003559967084961942,
"loss": 0.0611,
"step": 141000
},
{
"epoch": 14.473414083350441,
"eval_cer": 0.02088270142180095,
"eval_loss": 0.036524925380945206,
"eval_runtime": 22.0892,
"eval_samples_per_second": 4.074,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07149240393208221,
"step": 141000
},
{
"epoch": 14.576062410182715,
"grad_norm": 0.2218203842639923,
"learning_rate": 0.0003549681135568813,
"loss": 0.0612,
"step": 142000
},
{
"epoch": 14.576062410182715,
"eval_cer": 0.02058649289099526,
"eval_loss": 0.03738318383693695,
"eval_runtime": 22.4443,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07149240393208221,
"step": 142000
},
{
"epoch": 14.678710737014987,
"grad_norm": 0.18811815977096558,
"learning_rate": 0.0003539395186175684,
"loss": 0.0606,
"step": 143000
},
{
"epoch": 14.678710737014987,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.03684472665190697,
"eval_runtime": 22.3128,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07238605898123325,
"step": 143000
},
{
"epoch": 14.78135906384726,
"grad_norm": 0.20071063935756683,
"learning_rate": 0.0003529109236782555,
"loss": 0.0609,
"step": 144000
},
{
"epoch": 14.78135906384726,
"eval_cer": 0.018364928909952605,
"eval_loss": 0.037738535553216934,
"eval_runtime": 22.5071,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.064343163538874,
"step": 144000
},
{
"epoch": 14.884007390679532,
"grad_norm": 0.2046099305152893,
"learning_rate": 0.0003518823287389426,
"loss": 0.0614,
"step": 145000
},
{
"epoch": 14.884007390679532,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.03707651048898697,
"eval_runtime": 22.4459,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 145000
},
{
"epoch": 14.986655717511805,
"grad_norm": 0.27886244654655457,
"learning_rate": 0.0003508537337996297,
"loss": 0.0616,
"step": 146000
},
{
"epoch": 14.986655717511805,
"eval_cer": 0.022511848341232227,
"eval_loss": 0.038485873490571976,
"eval_runtime": 22.2607,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07685433422698838,
"step": 146000
},
{
"epoch": 15.089304044344077,
"grad_norm": 0.3120444118976593,
"learning_rate": 0.0003498251388603168,
"loss": 0.0598,
"step": 147000
},
{
"epoch": 15.089304044344077,
"eval_cer": 0.021475118483412322,
"eval_loss": 0.03692319989204407,
"eval_runtime": 22.2577,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07685433422698838,
"step": 147000
},
{
"epoch": 15.19195237117635,
"grad_norm": 0.2510223388671875,
"learning_rate": 0.0003487965439210039,
"loss": 0.06,
"step": 148000
},
{
"epoch": 15.19195237117635,
"eval_cer": 0.019549763033175356,
"eval_loss": 0.036081377416849136,
"eval_runtime": 22.3708,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06881143878462913,
"step": 148000
},
{
"epoch": 15.294600698008622,
"grad_norm": 0.2539554536342621,
"learning_rate": 0.000347767948981691,
"loss": 0.0603,
"step": 149000
},
{
"epoch": 15.294600698008622,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.036284659057855606,
"eval_runtime": 22.2203,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 149000
},
{
"epoch": 15.397249024840896,
"grad_norm": 0.3537413775920868,
"learning_rate": 0.0003467393540423781,
"loss": 0.0602,
"step": 150000
},
{
"epoch": 15.397249024840896,
"eval_cer": 0.01925355450236967,
"eval_loss": 0.037329014390707016,
"eval_runtime": 22.3303,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0679177837354781,
"step": 150000
},
{
"epoch": 15.499897351673168,
"grad_norm": 0.31098708510398865,
"learning_rate": 0.00034571075910306523,
"loss": 0.0599,
"step": 151000
},
{
"epoch": 15.499897351673168,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.0374312698841095,
"eval_runtime": 22.4269,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 151000
},
{
"epoch": 15.60254567850544,
"grad_norm": 0.18067589402198792,
"learning_rate": 0.0003446821641637523,
"loss": 0.0603,
"step": 152000
},
{
"epoch": 15.60254567850544,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.03638828173279762,
"eval_runtime": 22.5221,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06344950848972297,
"step": 152000
},
{
"epoch": 15.705194005337713,
"grad_norm": 0.26867741346359253,
"learning_rate": 0.0003436535692244394,
"loss": 0.0605,
"step": 153000
},
{
"epoch": 15.705194005337713,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.03720884397625923,
"eval_runtime": 22.3098,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 153000
},
{
"epoch": 15.807842332169985,
"grad_norm": 0.18743161857128143,
"learning_rate": 0.0003426249742851265,
"loss": 0.06,
"step": 154000
},
{
"epoch": 15.807842332169985,
"eval_cer": 0.021919431279620854,
"eval_loss": 0.03767675533890724,
"eval_runtime": 22.2871,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0741733690795353,
"step": 154000
},
{
"epoch": 15.910490659002258,
"grad_norm": 0.2153768688440323,
"learning_rate": 0.0003415963793458136,
"loss": 0.0599,
"step": 155000
},
{
"epoch": 15.910490659002258,
"eval_cer": 0.020290284360189575,
"eval_loss": 0.03886905685067177,
"eval_runtime": 22.1149,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07149240393208221,
"step": 155000
},
{
"epoch": 16.01313898583453,
"grad_norm": 0.24325600266456604,
"learning_rate": 0.0003405677844065007,
"loss": 0.0598,
"step": 156000
},
{
"epoch": 16.01313898583453,
"eval_cer": 0.02177132701421801,
"eval_loss": 0.03822890669107437,
"eval_runtime": 22.2699,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 156000
},
{
"epoch": 16.115787312666804,
"grad_norm": 0.2438814342021942,
"learning_rate": 0.0003395391894671878,
"loss": 0.0586,
"step": 157000
},
{
"epoch": 16.115787312666804,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.038069114089012146,
"eval_runtime": 22.2427,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06970509383378017,
"step": 157000
},
{
"epoch": 16.218435639499077,
"grad_norm": 0.22072641551494598,
"learning_rate": 0.00033851059452787494,
"loss": 0.0584,
"step": 158000
},
{
"epoch": 16.218435639499077,
"eval_cer": 0.0173281990521327,
"eval_loss": 0.03663622587919235,
"eval_runtime": 22.2026,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 158000
},
{
"epoch": 16.321083966331347,
"grad_norm": 0.15790699422359467,
"learning_rate": 0.00033748199958856203,
"loss": 0.0587,
"step": 159000
},
{
"epoch": 16.321083966331347,
"eval_cer": 0.019549763033175356,
"eval_loss": 0.03735322132706642,
"eval_runtime": 22.315,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06702412868632708,
"step": 159000
},
{
"epoch": 16.42373229316362,
"grad_norm": 0.1823723465204239,
"learning_rate": 0.0003364534046492491,
"loss": 0.0591,
"step": 160000
},
{
"epoch": 16.42373229316362,
"eval_cer": 0.021178909952606635,
"eval_loss": 0.03700649365782738,
"eval_runtime": 22.6895,
"eval_samples_per_second": 3.967,
"eval_steps_per_second": 0.044,
"eval_wer": 0.07059874888293119,
"step": 160000
},
{
"epoch": 16.526380619995894,
"grad_norm": 0.24684032797813416,
"learning_rate": 0.0003354248097099362,
"loss": 0.0595,
"step": 161000
},
{
"epoch": 16.526380619995894,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.03638649359345436,
"eval_runtime": 22.3004,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 161000
},
{
"epoch": 16.629028946828168,
"grad_norm": 0.2088789939880371,
"learning_rate": 0.0003343962147706233,
"loss": 0.059,
"step": 162000
},
{
"epoch": 16.629028946828168,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03632904216647148,
"eval_runtime": 22.2253,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 162000
},
{
"epoch": 16.731677273660438,
"grad_norm": 0.20426543056964874,
"learning_rate": 0.0003333676198313104,
"loss": 0.0598,
"step": 163000
},
{
"epoch": 16.731677273660438,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.03629469498991966,
"eval_runtime": 22.3453,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 163000
},
{
"epoch": 16.83432560049271,
"grad_norm": 0.2737009525299072,
"learning_rate": 0.0003323390248919975,
"loss": 0.0591,
"step": 164000
},
{
"epoch": 16.83432560049271,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03445509076118469,
"eval_runtime": 22.3887,
"eval_samples_per_second": 4.02,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 164000
},
{
"epoch": 16.936973927324985,
"grad_norm": 0.33581215143203735,
"learning_rate": 0.00033131042995268465,
"loss": 0.0586,
"step": 165000
},
{
"epoch": 16.936973927324985,
"eval_cer": 0.020290284360189575,
"eval_loss": 0.036238256841897964,
"eval_runtime": 22.1998,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06970509383378017,
"step": 165000
},
{
"epoch": 17.03962225415726,
"grad_norm": 0.277771532535553,
"learning_rate": 0.00033028183501337174,
"loss": 0.0591,
"step": 166000
},
{
"epoch": 17.03962225415726,
"eval_cer": 0.01806872037914692,
"eval_loss": 0.035338886082172394,
"eval_runtime": 22.132,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 166000
},
{
"epoch": 17.14227058098953,
"grad_norm": 0.17568770051002502,
"learning_rate": 0.00032925324007405883,
"loss": 0.0578,
"step": 167000
},
{
"epoch": 17.14227058098953,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03632746636867523,
"eval_runtime": 22.2554,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 167000
},
{
"epoch": 17.244918907821802,
"grad_norm": 0.22311237454414368,
"learning_rate": 0.0003282246451347459,
"loss": 0.0576,
"step": 168000
},
{
"epoch": 17.244918907821802,
"eval_cer": 0.01925355450236967,
"eval_loss": 0.036676324903964996,
"eval_runtime": 22.484,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06702412868632708,
"step": 168000
},
{
"epoch": 17.347567234654075,
"grad_norm": 0.3649640381336212,
"learning_rate": 0.000327196050195433,
"loss": 0.0576,
"step": 169000
},
{
"epoch": 17.347567234654075,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.035018790513277054,
"eval_runtime": 22.2563,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 169000
},
{
"epoch": 17.45021556148635,
"grad_norm": 0.2809694707393646,
"learning_rate": 0.0003261674552561201,
"loss": 0.0582,
"step": 170000
},
{
"epoch": 17.45021556148635,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.035496581345796585,
"eval_runtime": 22.1719,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 170000
},
{
"epoch": 17.55286388831862,
"grad_norm": 0.23279882967472076,
"learning_rate": 0.0003251388603168072,
"loss": 0.0587,
"step": 171000
},
{
"epoch": 17.55286388831862,
"eval_cer": 0.018364928909952605,
"eval_loss": 0.03641456365585327,
"eval_runtime": 22.1562,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 171000
},
{
"epoch": 17.655512215150893,
"grad_norm": 0.23531590402126312,
"learning_rate": 0.00032411026537749435,
"loss": 0.0587,
"step": 172000
},
{
"epoch": 17.655512215150893,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.034297019243240356,
"eval_runtime": 22.3222,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 172000
},
{
"epoch": 17.758160541983166,
"grad_norm": 0.38004380464553833,
"learning_rate": 0.00032308167043818144,
"loss": 0.0589,
"step": 173000
},
{
"epoch": 17.758160541983166,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03497479110956192,
"eval_runtime": 22.1444,
"eval_samples_per_second": 4.064,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 173000
},
{
"epoch": 17.86080886881544,
"grad_norm": 0.22771824896335602,
"learning_rate": 0.00032205307549886854,
"loss": 0.0578,
"step": 174000
},
{
"epoch": 17.86080886881544,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.034568045288324356,
"eval_runtime": 22.2545,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 174000
},
{
"epoch": 17.96345719564771,
"grad_norm": 0.3113113343715668,
"learning_rate": 0.00032102448055955563,
"loss": 0.0586,
"step": 175000
},
{
"epoch": 17.96345719564771,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.035004787147045135,
"eval_runtime": 22.3269,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 175000
},
{
"epoch": 18.066105522479983,
"grad_norm": 0.24952055513858795,
"learning_rate": 0.0003199958856202427,
"loss": 0.0571,
"step": 176000
},
{
"epoch": 18.066105522479983,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.03602117672562599,
"eval_runtime": 22.4032,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 176000
},
{
"epoch": 18.168753849312257,
"grad_norm": 0.33917441964149475,
"learning_rate": 0.0003189672906809298,
"loss": 0.0569,
"step": 177000
},
{
"epoch": 18.168753849312257,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.036349765956401825,
"eval_runtime": 22.273,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 177000
},
{
"epoch": 18.27140217614453,
"grad_norm": 0.18371905386447906,
"learning_rate": 0.0003179386957416169,
"loss": 0.0566,
"step": 178000
},
{
"epoch": 18.27140217614453,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.03550698608160019,
"eval_runtime": 22.3312,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 178000
},
{
"epoch": 18.3740505029768,
"grad_norm": 0.2601664662361145,
"learning_rate": 0.00031691010080230406,
"loss": 0.057,
"step": 179000
},
{
"epoch": 18.3740505029768,
"eval_cer": 0.018957345971563982,
"eval_loss": 0.035991400480270386,
"eval_runtime": 22.2432,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 179000
},
{
"epoch": 18.476698829809074,
"grad_norm": 0.17889824509620667,
"learning_rate": 0.00031588150586299115,
"loss": 0.057,
"step": 180000
},
{
"epoch": 18.476698829809074,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.03573347255587578,
"eval_runtime": 22.2321,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 180000
},
{
"epoch": 18.579347156641347,
"grad_norm": 0.5736141800880432,
"learning_rate": 0.00031485291092367824,
"loss": 0.0578,
"step": 181000
},
{
"epoch": 18.579347156641347,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.035990502685308456,
"eval_runtime": 22.2434,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 181000
},
{
"epoch": 18.68199548347362,
"grad_norm": 0.2320089340209961,
"learning_rate": 0.00031382431598436534,
"loss": 0.0581,
"step": 182000
},
{
"epoch": 18.68199548347362,
"eval_cer": 0.0173281990521327,
"eval_loss": 0.035308029502630234,
"eval_runtime": 22.161,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 182000
},
{
"epoch": 18.78464381030589,
"grad_norm": 0.31181567907333374,
"learning_rate": 0.00031279572104505243,
"loss": 0.0575,
"step": 183000
},
{
"epoch": 18.78464381030589,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03400159254670143,
"eval_runtime": 22.1991,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 183000
},
{
"epoch": 18.887292137138164,
"grad_norm": 0.26530441641807556,
"learning_rate": 0.0003117671261057395,
"loss": 0.0574,
"step": 184000
},
{
"epoch": 18.887292137138164,
"eval_cer": 0.020290284360189575,
"eval_loss": 0.03510544076561928,
"eval_runtime": 22.2589,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 184000
},
{
"epoch": 18.989940463970438,
"grad_norm": 0.2680375576019287,
"learning_rate": 0.0003107385311664266,
"loss": 0.0576,
"step": 185000
},
{
"epoch": 18.989940463970438,
"eval_cer": 0.021178909952606635,
"eval_loss": 0.037547577172517776,
"eval_runtime": 22.2321,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07238605898123325,
"step": 185000
},
{
"epoch": 19.09258879080271,
"grad_norm": 0.2254854440689087,
"learning_rate": 0.00030970993622711376,
"loss": 0.0561,
"step": 186000
},
{
"epoch": 19.09258879080271,
"eval_cer": 0.01940165876777251,
"eval_loss": 0.03498771786689758,
"eval_runtime": 22.2675,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 186000
},
{
"epoch": 19.19523711763498,
"grad_norm": 0.2187499850988388,
"learning_rate": 0.00030868134128780086,
"loss": 0.0563,
"step": 187000
},
{
"epoch": 19.19523711763498,
"eval_cer": 0.020734597156398103,
"eval_loss": 0.035063955932855606,
"eval_runtime": 22.2231,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 187000
},
{
"epoch": 19.297885444467255,
"grad_norm": 0.3217693567276001,
"learning_rate": 0.000307652746348488,
"loss": 0.0566,
"step": 188000
},
{
"epoch": 19.297885444467255,
"eval_cer": 0.020734597156398103,
"eval_loss": 0.034946467727422714,
"eval_runtime": 22.2783,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0679177837354781,
"step": 188000
},
{
"epoch": 19.40053377129953,
"grad_norm": 0.2570216655731201,
"learning_rate": 0.0003066241514091751,
"loss": 0.056,
"step": 189000
},
{
"epoch": 19.40053377129953,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.03498660773038864,
"eval_runtime": 22.3189,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0679177837354781,
"step": 189000
},
{
"epoch": 19.503182098131802,
"grad_norm": 0.24969050288200378,
"learning_rate": 0.0003055955564698622,
"loss": 0.0565,
"step": 190000
},
{
"epoch": 19.503182098131802,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.034704625606536865,
"eval_runtime": 22.2267,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 190000
},
{
"epoch": 19.605830424964072,
"grad_norm": 0.19874796271324158,
"learning_rate": 0.0003045669615305493,
"loss": 0.0565,
"step": 191000
},
{
"epoch": 19.605830424964072,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03414672613143921,
"eval_runtime": 22.4133,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 191000
},
{
"epoch": 19.708478751796346,
"grad_norm": 0.21598631143569946,
"learning_rate": 0.0003035383665912364,
"loss": 0.0569,
"step": 192000
},
{
"epoch": 19.708478751796346,
"eval_cer": 0.02088270142180095,
"eval_loss": 0.03441624715924263,
"eval_runtime": 22.4978,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0679177837354781,
"step": 192000
},
{
"epoch": 19.81112707862862,
"grad_norm": 0.740821361541748,
"learning_rate": 0.0003025097716519235,
"loss": 0.0566,
"step": 193000
},
{
"epoch": 19.81112707862862,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.03505128249526024,
"eval_runtime": 22.2297,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 193000
},
{
"epoch": 19.913775405460893,
"grad_norm": 0.25967320799827576,
"learning_rate": 0.0003014811767126106,
"loss": 0.0566,
"step": 194000
},
{
"epoch": 19.913775405460893,
"eval_cer": 0.02088270142180095,
"eval_loss": 0.03540065139532089,
"eval_runtime": 22.328,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07327971403038427,
"step": 194000
},
{
"epoch": 20.016423732293163,
"grad_norm": 0.2352762520313263,
"learning_rate": 0.0003004525817732977,
"loss": 0.0563,
"step": 195000
},
{
"epoch": 20.016423732293163,
"eval_cer": 0.021178909952606635,
"eval_loss": 0.0357496440410614,
"eval_runtime": 22.3495,
"eval_samples_per_second": 4.027,
"eval_steps_per_second": 0.045,
"eval_wer": 0.07059874888293119,
"step": 195000
},
{
"epoch": 20.119072059125436,
"grad_norm": 0.31662923097610474,
"learning_rate": 0.0002994239868339848,
"loss": 0.0556,
"step": 196000
},
{
"epoch": 20.119072059125436,
"eval_cer": 0.019697867298578198,
"eval_loss": 0.03499302640557289,
"eval_runtime": 22.3332,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 196000
},
{
"epoch": 20.22172038595771,
"grad_norm": 0.16036640107631683,
"learning_rate": 0.0002983953918946719,
"loss": 0.0556,
"step": 197000
},
{
"epoch": 20.22172038595771,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.03571586683392525,
"eval_runtime": 22.3554,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06970509383378017,
"step": 197000
},
{
"epoch": 20.324368712789983,
"grad_norm": 0.18146023154258728,
"learning_rate": 0.000297366796955359,
"loss": 0.0556,
"step": 198000
},
{
"epoch": 20.324368712789983,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.03470243141055107,
"eval_runtime": 22.2669,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 198000
},
{
"epoch": 20.427017039622253,
"grad_norm": 0.18593771755695343,
"learning_rate": 0.0002963382020160461,
"loss": 0.0553,
"step": 199000
},
{
"epoch": 20.427017039622253,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.034099601209163666,
"eval_runtime": 22.1758,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 199000
},
{
"epoch": 20.529665366454527,
"grad_norm": 0.16802427172660828,
"learning_rate": 0.00029530960707673323,
"loss": 0.0558,
"step": 200000
},
{
"epoch": 20.529665366454527,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.034865912050008774,
"eval_runtime": 22.2605,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 200000
},
{
"epoch": 20.6323136932868,
"grad_norm": 0.20501871407032013,
"learning_rate": 0.0002942810121374203,
"loss": 0.0559,
"step": 201000
},
{
"epoch": 20.6323136932868,
"eval_cer": 0.018957345971563982,
"eval_loss": 0.03544703871011734,
"eval_runtime": 22.0876,
"eval_samples_per_second": 4.075,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 201000
},
{
"epoch": 20.73496202011907,
"grad_norm": 0.19251494109630585,
"learning_rate": 0.0002932524171981074,
"loss": 0.0555,
"step": 202000
},
{
"epoch": 20.73496202011907,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.03462394326925278,
"eval_runtime": 22.1969,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 202000
},
{
"epoch": 20.837610346951344,
"grad_norm": 0.12611526250839233,
"learning_rate": 0.0002922238222587945,
"loss": 0.0558,
"step": 203000
},
{
"epoch": 20.837610346951344,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03471866995096207,
"eval_runtime": 22.9419,
"eval_samples_per_second": 3.923,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06344950848972297,
"step": 203000
},
{
"epoch": 20.940258673783617,
"grad_norm": 0.23571810126304626,
"learning_rate": 0.0002911952273194816,
"loss": 0.0553,
"step": 204000
},
{
"epoch": 20.940258673783617,
"eval_cer": 0.016735781990521326,
"eval_loss": 0.033052537590265274,
"eval_runtime": 22.2403,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 204000
},
{
"epoch": 21.04290700061589,
"grad_norm": 0.16654832661151886,
"learning_rate": 0.0002901666323801687,
"loss": 0.0552,
"step": 205000
},
{
"epoch": 21.04290700061589,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03388543054461479,
"eval_runtime": 22.3538,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 205000
},
{
"epoch": 21.14555532744816,
"grad_norm": 0.30480700731277466,
"learning_rate": 0.0002891380374408558,
"loss": 0.054,
"step": 206000
},
{
"epoch": 21.14555532744816,
"eval_cer": 0.017180094786729858,
"eval_loss": 0.03281432017683983,
"eval_runtime": 22.4896,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05540661304736372,
"step": 206000
},
{
"epoch": 21.248203654280434,
"grad_norm": 0.22593766450881958,
"learning_rate": 0.00028810944250154294,
"loss": 0.0544,
"step": 207000
},
{
"epoch": 21.248203654280434,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03340643644332886,
"eval_runtime": 22.4028,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 207000
},
{
"epoch": 21.350851981112708,
"grad_norm": 0.1935175508260727,
"learning_rate": 0.00028708084756223003,
"loss": 0.0549,
"step": 208000
},
{
"epoch": 21.350851981112708,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.03223665431141853,
"eval_runtime": 22.4082,
"eval_samples_per_second": 4.016,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 208000
},
{
"epoch": 21.45350030794498,
"grad_norm": 0.18650276958942413,
"learning_rate": 0.0002860522526229171,
"loss": 0.0549,
"step": 209000
},
{
"epoch": 21.45350030794498,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.0338866226375103,
"eval_runtime": 22.2564,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 209000
},
{
"epoch": 21.55614863477725,
"grad_norm": 0.2135065495967865,
"learning_rate": 0.0002850236576836042,
"loss": 0.055,
"step": 210000
},
{
"epoch": 21.55614863477725,
"eval_cer": 0.019845971563981043,
"eval_loss": 0.033525411039590836,
"eval_runtime": 22.1326,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 210000
},
{
"epoch": 21.658796961609525,
"grad_norm": 0.21206118166446686,
"learning_rate": 0.0002839950627442913,
"loss": 0.0547,
"step": 211000
},
{
"epoch": 21.658796961609525,
"eval_cer": 0.01940165876777251,
"eval_loss": 0.033743493258953094,
"eval_runtime": 22.1212,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 211000
},
{
"epoch": 21.7614452884418,
"grad_norm": 0.41899242997169495,
"learning_rate": 0.0002829664678049784,
"loss": 0.0551,
"step": 212000
},
{
"epoch": 21.7614452884418,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.03284008055925369,
"eval_runtime": 22.3424,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 212000
},
{
"epoch": 21.864093615274072,
"grad_norm": 0.16063542664051056,
"learning_rate": 0.0002819378728656655,
"loss": 0.0547,
"step": 213000
},
{
"epoch": 21.864093615274072,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.03267466276884079,
"eval_runtime": 22.247,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 213000
},
{
"epoch": 21.966741942106342,
"grad_norm": 0.3092317283153534,
"learning_rate": 0.00028090927792635264,
"loss": 0.055,
"step": 214000
},
{
"epoch": 21.966741942106342,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.032835327088832855,
"eval_runtime": 22.1659,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 214000
},
{
"epoch": 22.069390268938616,
"grad_norm": 0.21150179207324982,
"learning_rate": 0.00027988068298703974,
"loss": 0.0544,
"step": 215000
},
{
"epoch": 22.069390268938616,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.03344175964593887,
"eval_runtime": 22.2595,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 215000
},
{
"epoch": 22.17203859577089,
"grad_norm": 0.7867759466171265,
"learning_rate": 0.00027885208804772683,
"loss": 0.0535,
"step": 216000
},
{
"epoch": 22.17203859577089,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.03438210114836693,
"eval_runtime": 22.4442,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 216000
},
{
"epoch": 22.274686922603163,
"grad_norm": 0.37553030252456665,
"learning_rate": 0.0002778234931084139,
"loss": 0.0546,
"step": 217000
},
{
"epoch": 22.274686922603163,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.033362455666065216,
"eval_runtime": 22.2715,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 217000
},
{
"epoch": 22.377335249435433,
"grad_norm": 0.29263338446617126,
"learning_rate": 0.000276794898169101,
"loss": 0.0537,
"step": 218000
},
{
"epoch": 22.377335249435433,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03228195384144783,
"eval_runtime": 22.3277,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 218000
},
{
"epoch": 22.479983576267706,
"grad_norm": 0.25038591027259827,
"learning_rate": 0.0002757663032297881,
"loss": 0.0533,
"step": 219000
},
{
"epoch": 22.479983576267706,
"eval_cer": 0.018957345971563982,
"eval_loss": 0.033720944076776505,
"eval_runtime": 22.5305,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06702412868632708,
"step": 219000
},
{
"epoch": 22.58263190309998,
"grad_norm": 0.1750280261039734,
"learning_rate": 0.0002747377082904752,
"loss": 0.055,
"step": 220000
},
{
"epoch": 22.58263190309998,
"eval_cer": 0.016735781990521326,
"eval_loss": 0.03310966119170189,
"eval_runtime": 22.4023,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 220000
},
{
"epoch": 22.685280229932253,
"grad_norm": 0.24312840402126312,
"learning_rate": 0.00027370911335116235,
"loss": 0.0541,
"step": 221000
},
{
"epoch": 22.685280229932253,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.03354882076382637,
"eval_runtime": 22.1565,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 221000
},
{
"epoch": 22.787928556764523,
"grad_norm": 0.31420522928237915,
"learning_rate": 0.00027268051841184944,
"loss": 0.0541,
"step": 222000
},
{
"epoch": 22.787928556764523,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.03195945546030998,
"eval_runtime": 22.1178,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 222000
},
{
"epoch": 22.890576883596797,
"grad_norm": 0.5160701870918274,
"learning_rate": 0.00027165192347253654,
"loss": 0.0544,
"step": 223000
},
{
"epoch": 22.890576883596797,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.0328957661986351,
"eval_runtime": 22.2935,
"eval_samples_per_second": 4.037,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 223000
},
{
"epoch": 22.99322521042907,
"grad_norm": 0.18564113974571228,
"learning_rate": 0.00027062332853322363,
"loss": 0.0538,
"step": 224000
},
{
"epoch": 22.99322521042907,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.033330611884593964,
"eval_runtime": 22.2627,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 224000
},
{
"epoch": 23.095873537261344,
"grad_norm": 0.19497232139110565,
"learning_rate": 0.0002695947335939107,
"loss": 0.0527,
"step": 225000
},
{
"epoch": 23.095873537261344,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.03324893116950989,
"eval_runtime": 22.2973,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 225000
},
{
"epoch": 23.198521864093614,
"grad_norm": 0.24956081807613373,
"learning_rate": 0.0002685661386545978,
"loss": 0.0529,
"step": 226000
},
{
"epoch": 23.198521864093614,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03351821005344391,
"eval_runtime": 22.2956,
"eval_samples_per_second": 4.037,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 226000
},
{
"epoch": 23.301170190925887,
"grad_norm": 0.1977517157793045,
"learning_rate": 0.00026753754371528496,
"loss": 0.0532,
"step": 227000
},
{
"epoch": 23.301170190925887,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03354490175843239,
"eval_runtime": 22.2726,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 227000
},
{
"epoch": 23.40381851775816,
"grad_norm": 0.23078420758247375,
"learning_rate": 0.00026650894877597206,
"loss": 0.0537,
"step": 228000
},
{
"epoch": 23.40381851775816,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.03308222442865372,
"eval_runtime": 22.3431,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 228000
},
{
"epoch": 23.506466844590435,
"grad_norm": 0.3596203029155731,
"learning_rate": 0.00026548035383665915,
"loss": 0.0534,
"step": 229000
},
{
"epoch": 23.506466844590435,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.03161655366420746,
"eval_runtime": 22.3127,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 229000
},
{
"epoch": 23.609115171422705,
"grad_norm": 0.26955559849739075,
"learning_rate": 0.00026445175889734624,
"loss": 0.0538,
"step": 230000
},
{
"epoch": 23.609115171422705,
"eval_cer": 0.01940165876777251,
"eval_loss": 0.03247096389532089,
"eval_runtime": 22.3142,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 230000
},
{
"epoch": 23.711763498254978,
"grad_norm": 0.305859237909317,
"learning_rate": 0.00026342316395803334,
"loss": 0.0534,
"step": 231000
},
{
"epoch": 23.711763498254978,
"eval_cer": 0.017180094786729858,
"eval_loss": 0.03188026696443558,
"eval_runtime": 22.28,
"eval_samples_per_second": 4.039,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 231000
},
{
"epoch": 23.81441182508725,
"grad_norm": 0.414869099855423,
"learning_rate": 0.00026239456901872043,
"loss": 0.0534,
"step": 232000
},
{
"epoch": 23.81441182508725,
"eval_cer": 0.01940165876777251,
"eval_loss": 0.033250004053115845,
"eval_runtime": 22.3878,
"eval_samples_per_second": 4.02,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06613047363717604,
"step": 232000
},
{
"epoch": 23.917060151919525,
"grad_norm": 0.24189621210098267,
"learning_rate": 0.0002613659740794075,
"loss": 0.0537,
"step": 233000
},
{
"epoch": 23.917060151919525,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.033227771520614624,
"eval_runtime": 22.1167,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 233000
},
{
"epoch": 24.019708478751795,
"grad_norm": 0.19833236932754517,
"learning_rate": 0.00026033737914009467,
"loss": 0.0533,
"step": 234000
},
{
"epoch": 24.019708478751795,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03290673345327377,
"eval_runtime": 22.278,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 234000
},
{
"epoch": 24.12235680558407,
"grad_norm": 0.15947362780570984,
"learning_rate": 0.00025930878420078176,
"loss": 0.0523,
"step": 235000
},
{
"epoch": 24.12235680558407,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03324710577726364,
"eval_runtime": 22.2183,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 235000
},
{
"epoch": 24.225005132416342,
"grad_norm": 0.19958557188510895,
"learning_rate": 0.00025828018926146886,
"loss": 0.0522,
"step": 236000
},
{
"epoch": 24.225005132416342,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.031826525926589966,
"eval_runtime": 23.5228,
"eval_samples_per_second": 3.826,
"eval_steps_per_second": 0.043,
"eval_wer": 0.05451295799821269,
"step": 236000
},
{
"epoch": 24.327653459248616,
"grad_norm": 0.20118238031864166,
"learning_rate": 0.00025725159432215595,
"loss": 0.0518,
"step": 237000
},
{
"epoch": 24.327653459248616,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.03189582750201225,
"eval_runtime": 22.1576,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 237000
},
{
"epoch": 24.430301786080886,
"grad_norm": 0.2729335129261017,
"learning_rate": 0.00025622299938284304,
"loss": 0.0529,
"step": 238000
},
{
"epoch": 24.430301786080886,
"eval_cer": 0.017180094786729858,
"eval_loss": 0.03211754932999611,
"eval_runtime": 22.2144,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 238000
},
{
"epoch": 24.53295011291316,
"grad_norm": 0.18871107697486877,
"learning_rate": 0.00025519440444353014,
"loss": 0.0531,
"step": 239000
},
{
"epoch": 24.53295011291316,
"eval_cer": 0.01806872037914692,
"eval_loss": 0.031803932040929794,
"eval_runtime": 22.1636,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 239000
},
{
"epoch": 24.635598439745433,
"grad_norm": 0.22552721202373505,
"learning_rate": 0.00025416580950421723,
"loss": 0.053,
"step": 240000
},
{
"epoch": 24.635598439745433,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.03302132338285446,
"eval_runtime": 22.305,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 240000
},
{
"epoch": 24.738246766577706,
"grad_norm": 0.3143594264984131,
"learning_rate": 0.0002531372145649044,
"loss": 0.0535,
"step": 241000
},
{
"epoch": 24.738246766577706,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.032325536012649536,
"eval_runtime": 22.2087,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 241000
},
{
"epoch": 24.840895093409976,
"grad_norm": 0.2053222805261612,
"learning_rate": 0.00025210861962559147,
"loss": 0.053,
"step": 242000
},
{
"epoch": 24.840895093409976,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.03247794508934021,
"eval_runtime": 22.8241,
"eval_samples_per_second": 3.943,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06255585344057193,
"step": 242000
},
{
"epoch": 24.94354342024225,
"grad_norm": 0.2229388952255249,
"learning_rate": 0.00025108002468627856,
"loss": 0.0525,
"step": 243000
},
{
"epoch": 24.94354342024225,
"eval_cer": 0.020290284360189575,
"eval_loss": 0.032272905111312866,
"eval_runtime": 22.7477,
"eval_samples_per_second": 3.956,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06702412868632708,
"step": 243000
},
{
"epoch": 25.046191747074523,
"grad_norm": 0.21171027421951294,
"learning_rate": 0.00025005142974696566,
"loss": 0.052,
"step": 244000
},
{
"epoch": 25.046191747074523,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.03205866739153862,
"eval_runtime": 22.1495,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 244000
},
{
"epoch": 25.148840073906797,
"grad_norm": 0.28519150614738464,
"learning_rate": 0.00024902283480765275,
"loss": 0.0518,
"step": 245000
},
{
"epoch": 25.148840073906797,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.0315067283809185,
"eval_runtime": 22.1659,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 245000
},
{
"epoch": 25.251488400739067,
"grad_norm": 0.19159762561321259,
"learning_rate": 0.00024799423986833984,
"loss": 0.0519,
"step": 246000
},
{
"epoch": 25.251488400739067,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.03155896067619324,
"eval_runtime": 22.1685,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 246000
},
{
"epoch": 25.35413672757134,
"grad_norm": 0.2302646040916443,
"learning_rate": 0.00024696564492902694,
"loss": 0.0521,
"step": 247000
},
{
"epoch": 25.35413672757134,
"eval_cer": 0.018364928909952605,
"eval_loss": 0.032100409269332886,
"eval_runtime": 22.1736,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 247000
},
{
"epoch": 25.456785054403614,
"grad_norm": 0.2624431848526001,
"learning_rate": 0.0002459370499897141,
"loss": 0.0525,
"step": 248000
},
{
"epoch": 25.456785054403614,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.03178829327225685,
"eval_runtime": 22.2239,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 248000
},
{
"epoch": 25.559433381235884,
"grad_norm": 0.24456505477428436,
"learning_rate": 0.0002449084550504012,
"loss": 0.052,
"step": 249000
},
{
"epoch": 25.559433381235884,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.030579831451177597,
"eval_runtime": 22.261,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 249000
},
{
"epoch": 25.662081708068158,
"grad_norm": 0.1688804030418396,
"learning_rate": 0.00024387986011108827,
"loss": 0.0518,
"step": 250000
},
{
"epoch": 25.662081708068158,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.033347100019454956,
"eval_runtime": 22.153,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 250000
},
{
"epoch": 25.76473003490043,
"grad_norm": 0.1677083969116211,
"learning_rate": 0.00024285126517177536,
"loss": 0.0524,
"step": 251000
},
{
"epoch": 25.76473003490043,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.031150901690125465,
"eval_runtime": 22.137,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 251000
},
{
"epoch": 25.867378361732705,
"grad_norm": 0.28190451860427856,
"learning_rate": 0.00024182267023246246,
"loss": 0.0525,
"step": 252000
},
{
"epoch": 25.867378361732705,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.030852019786834717,
"eval_runtime": 22.2042,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 252000
},
{
"epoch": 25.970026688564978,
"grad_norm": 0.30484601855278015,
"learning_rate": 0.00024079407529314955,
"loss": 0.0525,
"step": 253000
},
{
"epoch": 25.970026688564978,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.03129878640174866,
"eval_runtime": 22.1837,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 253000
},
{
"epoch": 26.072675015397248,
"grad_norm": 0.23301398754119873,
"learning_rate": 0.00023976548035383667,
"loss": 0.0511,
"step": 254000
},
{
"epoch": 26.072675015397248,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.03187458962202072,
"eval_runtime": 22.2688,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 254000
},
{
"epoch": 26.17532334222952,
"grad_norm": 0.2110058218240738,
"learning_rate": 0.00023873688541452376,
"loss": 0.051,
"step": 255000
},
{
"epoch": 26.17532334222952,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.03192685917019844,
"eval_runtime": 22.1497,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 255000
},
{
"epoch": 26.277971669061795,
"grad_norm": 0.7681686282157898,
"learning_rate": 0.00023770829047521086,
"loss": 0.0512,
"step": 256000
},
{
"epoch": 26.277971669061795,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.03258216753602028,
"eval_runtime": 22.2138,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 256000
},
{
"epoch": 26.380619995894065,
"grad_norm": 0.20167267322540283,
"learning_rate": 0.00023667969553589798,
"loss": 0.0511,
"step": 257000
},
{
"epoch": 26.380619995894065,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.03150052949786186,
"eval_runtime": 22.1067,
"eval_samples_per_second": 4.071,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 257000
},
{
"epoch": 26.48326832272634,
"grad_norm": 0.2660065293312073,
"learning_rate": 0.00023565110059658507,
"loss": 0.0514,
"step": 258000
},
{
"epoch": 26.48326832272634,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.031788378953933716,
"eval_runtime": 22.0549,
"eval_samples_per_second": 4.081,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 258000
},
{
"epoch": 26.585916649558612,
"grad_norm": 0.18703380227088928,
"learning_rate": 0.00023462250565727216,
"loss": 0.0516,
"step": 259000
},
{
"epoch": 26.585916649558612,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.03131980076432228,
"eval_runtime": 22.1633,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 259000
},
{
"epoch": 26.688564976390886,
"grad_norm": 0.25654709339141846,
"learning_rate": 0.00023359391071795926,
"loss": 0.0517,
"step": 260000
},
{
"epoch": 26.688564976390886,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.031445086002349854,
"eval_runtime": 22.3924,
"eval_samples_per_second": 4.019,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 260000
},
{
"epoch": 26.791213303223156,
"grad_norm": 0.2620410919189453,
"learning_rate": 0.00023256531577864638,
"loss": 0.052,
"step": 261000
},
{
"epoch": 26.791213303223156,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.03067016415297985,
"eval_runtime": 22.1951,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 261000
},
{
"epoch": 26.89386163005543,
"grad_norm": 0.2175012230873108,
"learning_rate": 0.00023153672083933347,
"loss": 0.0519,
"step": 262000
},
{
"epoch": 26.89386163005543,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.031155884265899658,
"eval_runtime": 22.1754,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 262000
},
{
"epoch": 26.996509956887703,
"grad_norm": 0.22425027191638947,
"learning_rate": 0.00023050812590002056,
"loss": 0.052,
"step": 263000
},
{
"epoch": 26.996509956887703,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.030132591724395752,
"eval_runtime": 22.1695,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 263000
},
{
"epoch": 27.099158283719976,
"grad_norm": 0.24267776310443878,
"learning_rate": 0.00022947953096070768,
"loss": 0.0506,
"step": 264000
},
{
"epoch": 27.099158283719976,
"eval_cer": 0.0173281990521327,
"eval_loss": 0.030183136463165283,
"eval_runtime": 22.1929,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 264000
},
{
"epoch": 27.201806610552246,
"grad_norm": 0.22535988688468933,
"learning_rate": 0.00022845093602139478,
"loss": 0.051,
"step": 265000
},
{
"epoch": 27.201806610552246,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.029872052371501923,
"eval_runtime": 21.6648,
"eval_samples_per_second": 4.154,
"eval_steps_per_second": 0.046,
"eval_wer": 0.05272564789991063,
"step": 265000
},
{
"epoch": 27.30445493738452,
"grad_norm": 0.2646799385547638,
"learning_rate": 0.00022742234108208187,
"loss": 0.0511,
"step": 266000
},
{
"epoch": 27.30445493738452,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.030452899634838104,
"eval_runtime": 21.9979,
"eval_samples_per_second": 4.091,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 266000
},
{
"epoch": 27.407103264216794,
"grad_norm": 0.1818641871213913,
"learning_rate": 0.00022639374614276896,
"loss": 0.0507,
"step": 267000
},
{
"epoch": 27.407103264216794,
"eval_cer": 0.01806872037914692,
"eval_loss": 0.030228691175580025,
"eval_runtime": 22.1513,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 267000
},
{
"epoch": 27.509751591049067,
"grad_norm": 0.2914126515388489,
"learning_rate": 0.00022536515120345608,
"loss": 0.0506,
"step": 268000
},
{
"epoch": 27.509751591049067,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.030619405210018158,
"eval_runtime": 22.1314,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 268000
},
{
"epoch": 27.612399917881337,
"grad_norm": 0.19746644794940948,
"learning_rate": 0.00022433655626414317,
"loss": 0.0508,
"step": 269000
},
{
"epoch": 27.612399917881337,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.029848448932170868,
"eval_runtime": 22.0615,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 269000
},
{
"epoch": 27.71504824471361,
"grad_norm": 0.171453595161438,
"learning_rate": 0.00022330796132483027,
"loss": 0.0506,
"step": 270000
},
{
"epoch": 27.71504824471361,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.029385404661297798,
"eval_runtime": 22.1939,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 270000
},
{
"epoch": 27.817696571545884,
"grad_norm": 0.20320715010166168,
"learning_rate": 0.0002222793663855174,
"loss": 0.0513,
"step": 271000
},
{
"epoch": 27.817696571545884,
"eval_cer": 0.016735781990521326,
"eval_loss": 0.03053821623325348,
"eval_runtime": 22.1393,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 271000
},
{
"epoch": 27.920344898378158,
"grad_norm": 0.18147552013397217,
"learning_rate": 0.00022125077144620448,
"loss": 0.051,
"step": 272000
},
{
"epoch": 27.920344898378158,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.030883438885211945,
"eval_runtime": 22.1075,
"eval_samples_per_second": 4.071,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 272000
},
{
"epoch": 28.022993225210428,
"grad_norm": 0.3934711813926697,
"learning_rate": 0.00022022217650689157,
"loss": 0.0505,
"step": 273000
},
{
"epoch": 28.022993225210428,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.030014000833034515,
"eval_runtime": 21.9105,
"eval_samples_per_second": 4.108,
"eval_steps_per_second": 0.046,
"eval_wer": 0.05451295799821269,
"step": 273000
},
{
"epoch": 28.1256415520427,
"grad_norm": 0.18721525371074677,
"learning_rate": 0.0002191935815675787,
"loss": 0.0495,
"step": 274000
},
{
"epoch": 28.1256415520427,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.029803840443491936,
"eval_runtime": 22.2218,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 274000
},
{
"epoch": 28.228289878874975,
"grad_norm": 0.2836057245731354,
"learning_rate": 0.0002181649866282658,
"loss": 0.0499,
"step": 275000
},
{
"epoch": 28.228289878874975,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.02985943853855133,
"eval_runtime": 22.1574,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 275000
},
{
"epoch": 28.33093820570725,
"grad_norm": 0.16741269826889038,
"learning_rate": 0.00021713639168895288,
"loss": 0.0503,
"step": 276000
},
{
"epoch": 28.33093820570725,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.02978348545730114,
"eval_runtime": 22.2061,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 276000
},
{
"epoch": 28.43358653253952,
"grad_norm": 0.5261670351028442,
"learning_rate": 0.00021610779674963997,
"loss": 0.0505,
"step": 277000
},
{
"epoch": 28.43358653253952,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.02990272268652916,
"eval_runtime": 22.1117,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 277000
},
{
"epoch": 28.536234859371792,
"grad_norm": 0.18356911838054657,
"learning_rate": 0.0002150792018103271,
"loss": 0.0502,
"step": 278000
},
{
"epoch": 28.536234859371792,
"eval_cer": 0.017476303317535545,
"eval_loss": 0.0308319590985775,
"eval_runtime": 22.2031,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 278000
},
{
"epoch": 28.638883186204065,
"grad_norm": 0.2313164621591568,
"learning_rate": 0.0002140506068710142,
"loss": 0.0506,
"step": 279000
},
{
"epoch": 28.638883186204065,
"eval_cer": 0.016735781990521326,
"eval_loss": 0.03133101388812065,
"eval_runtime": 22.2245,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 279000
},
{
"epoch": 28.74153151303634,
"grad_norm": 0.23044399917125702,
"learning_rate": 0.00021302201193170128,
"loss": 0.0511,
"step": 280000
},
{
"epoch": 28.74153151303634,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.0300216656178236,
"eval_runtime": 22.3006,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 280000
},
{
"epoch": 28.84417983986861,
"grad_norm": 0.35165879130363464,
"learning_rate": 0.0002119934169923884,
"loss": 0.051,
"step": 281000
},
{
"epoch": 28.84417983986861,
"eval_cer": 0.0173281990521327,
"eval_loss": 0.029326628893613815,
"eval_runtime": 22.3265,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 281000
},
{
"epoch": 28.946828166700882,
"grad_norm": 0.276239275932312,
"learning_rate": 0.00021096482205307552,
"loss": 0.0499,
"step": 282000
},
{
"epoch": 28.946828166700882,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.02999110147356987,
"eval_runtime": 22.3215,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 282000
},
{
"epoch": 29.049476493533156,
"grad_norm": 0.2091301679611206,
"learning_rate": 0.00020993622711376261,
"loss": 0.0494,
"step": 283000
},
{
"epoch": 29.049476493533156,
"eval_cer": 0.016735781990521326,
"eval_loss": 0.029503343626856804,
"eval_runtime": 22.2699,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 283000
},
{
"epoch": 29.15212482036543,
"grad_norm": 0.3076883852481842,
"learning_rate": 0.0002089076321744497,
"loss": 0.0493,
"step": 284000
},
{
"epoch": 29.15212482036543,
"eval_cer": 0.01806872037914692,
"eval_loss": 0.02992323227226734,
"eval_runtime": 22.4692,
"eval_samples_per_second": 4.005,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 284000
},
{
"epoch": 29.2547731471977,
"grad_norm": 0.2164120078086853,
"learning_rate": 0.00020787903723513683,
"loss": 0.0492,
"step": 285000
},
{
"epoch": 29.2547731471977,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.029783058911561966,
"eval_runtime": 22.2084,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 285000
},
{
"epoch": 29.357421474029973,
"grad_norm": 0.25016504526138306,
"learning_rate": 0.00020685044229582392,
"loss": 0.0497,
"step": 286000
},
{
"epoch": 29.357421474029973,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.029911022633314133,
"eval_runtime": 22.0994,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05987488829311886,
"step": 286000
},
{
"epoch": 29.460069800862247,
"grad_norm": 0.2327207326889038,
"learning_rate": 0.00020582184735651101,
"loss": 0.0496,
"step": 287000
},
{
"epoch": 29.460069800862247,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.028591720387339592,
"eval_runtime": 22.1807,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 287000
},
{
"epoch": 29.56271812769452,
"grad_norm": 0.25889137387275696,
"learning_rate": 0.00020479325241719813,
"loss": 0.0497,
"step": 288000
},
{
"epoch": 29.56271812769452,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.02914293482899666,
"eval_runtime": 22.2487,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 288000
},
{
"epoch": 29.66536645452679,
"grad_norm": 0.3326428532600403,
"learning_rate": 0.00020376465747788523,
"loss": 0.0499,
"step": 289000
},
{
"epoch": 29.66536645452679,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.028966935351490974,
"eval_runtime": 22.2836,
"eval_samples_per_second": 4.039,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 289000
},
{
"epoch": 29.768014781359064,
"grad_norm": 0.2747463583946228,
"learning_rate": 0.00020273606253857232,
"loss": 0.0498,
"step": 290000
},
{
"epoch": 29.768014781359064,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.027468033134937286,
"eval_runtime": 22.3116,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 290000
},
{
"epoch": 29.870663108191337,
"grad_norm": 0.31372368335723877,
"learning_rate": 0.00020170746759925941,
"loss": 0.0495,
"step": 291000
},
{
"epoch": 29.870663108191337,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.029326878488063812,
"eval_runtime": 22.3423,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 291000
},
{
"epoch": 29.97331143502361,
"grad_norm": 0.18704882264137268,
"learning_rate": 0.00020067887265994653,
"loss": 0.0499,
"step": 292000
},
{
"epoch": 29.97331143502361,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.028479211032390594,
"eval_runtime": 22.4625,
"eval_samples_per_second": 4.007,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 292000
},
{
"epoch": 30.07595976185588,
"grad_norm": 0.1708535999059677,
"learning_rate": 0.00019965027772063363,
"loss": 0.0484,
"step": 293000
},
{
"epoch": 30.07595976185588,
"eval_cer": 0.013921800947867298,
"eval_loss": 0.028360920026898384,
"eval_runtime": 22.3021,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 293000
},
{
"epoch": 30.178608088688154,
"grad_norm": 0.24666380882263184,
"learning_rate": 0.00019862168278132072,
"loss": 0.0489,
"step": 294000
},
{
"epoch": 30.178608088688154,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.028621409088373184,
"eval_runtime": 22.2968,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 294000
},
{
"epoch": 30.281256415520428,
"grad_norm": 0.2784259021282196,
"learning_rate": 0.00019759308784200784,
"loss": 0.0493,
"step": 295000
},
{
"epoch": 30.281256415520428,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.028690271079540253,
"eval_runtime": 22.252,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 295000
},
{
"epoch": 30.3839047423527,
"grad_norm": 0.2628447711467743,
"learning_rate": 0.00019656449290269493,
"loss": 0.049,
"step": 296000
},
{
"epoch": 30.3839047423527,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.028396843001246452,
"eval_runtime": 22.2552,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 296000
},
{
"epoch": 30.48655306918497,
"grad_norm": 0.23084846138954163,
"learning_rate": 0.00019553589796338203,
"loss": 0.049,
"step": 297000
},
{
"epoch": 30.48655306918497,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.02870321460068226,
"eval_runtime": 22.2205,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 297000
},
{
"epoch": 30.589201396017245,
"grad_norm": 0.17921754717826843,
"learning_rate": 0.00019450730302406912,
"loss": 0.0491,
"step": 298000
},
{
"epoch": 30.589201396017245,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.028362760320305824,
"eval_runtime": 22.2536,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 298000
},
{
"epoch": 30.69184972284952,
"grad_norm": 0.20431461930274963,
"learning_rate": 0.00019347870808475624,
"loss": 0.0493,
"step": 299000
},
{
"epoch": 30.69184972284952,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.027963554486632347,
"eval_runtime": 22.2764,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 299000
},
{
"epoch": 30.794498049681792,
"grad_norm": 0.3255954384803772,
"learning_rate": 0.00019245011314544333,
"loss": 0.049,
"step": 300000
},
{
"epoch": 30.794498049681792,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.028848888352513313,
"eval_runtime": 22.3367,
"eval_samples_per_second": 4.029,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 300000
},
{
"epoch": 30.897146376514062,
"grad_norm": 0.30805402994155884,
"learning_rate": 0.00019142151820613043,
"loss": 0.0493,
"step": 301000
},
{
"epoch": 30.897146376514062,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.029449112713336945,
"eval_runtime": 22.2797,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 301000
},
{
"epoch": 30.999794703346335,
"grad_norm": 0.2879369854927063,
"learning_rate": 0.00019039292326681755,
"loss": 0.0491,
"step": 302000
},
{
"epoch": 30.999794703346335,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.028681093826889992,
"eval_runtime": 22.3072,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 302000
},
{
"epoch": 31.10244303017861,
"grad_norm": 0.30376702547073364,
"learning_rate": 0.00018936432832750464,
"loss": 0.0479,
"step": 303000
},
{
"epoch": 31.10244303017861,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.028124256059527397,
"eval_runtime": 22.255,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 303000
},
{
"epoch": 31.20509135701088,
"grad_norm": 0.325859934091568,
"learning_rate": 0.00018833573338819173,
"loss": 0.0485,
"step": 304000
},
{
"epoch": 31.20509135701088,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.028593741357326508,
"eval_runtime": 22.3606,
"eval_samples_per_second": 4.025,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 304000
},
{
"epoch": 31.307739683843153,
"grad_norm": 0.26860424876213074,
"learning_rate": 0.00018730713844887883,
"loss": 0.0481,
"step": 305000
},
{
"epoch": 31.307739683843153,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.028675682842731476,
"eval_runtime": 22.1875,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 305000
},
{
"epoch": 31.410388010675426,
"grad_norm": 0.36149585247039795,
"learning_rate": 0.00018627854350956595,
"loss": 0.0488,
"step": 306000
},
{
"epoch": 31.410388010675426,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.029292631894350052,
"eval_runtime": 22.1759,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 306000
},
{
"epoch": 31.5130363375077,
"grad_norm": 0.23009520769119263,
"learning_rate": 0.00018524994857025304,
"loss": 0.0487,
"step": 307000
},
{
"epoch": 31.5130363375077,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.02981569990515709,
"eval_runtime": 22.1972,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 307000
},
{
"epoch": 31.61568466433997,
"grad_norm": 0.2854170799255371,
"learning_rate": 0.00018422135363094013,
"loss": 0.0485,
"step": 308000
},
{
"epoch": 31.61568466433997,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.030133897438645363,
"eval_runtime": 22.3301,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 308000
},
{
"epoch": 31.718332991172243,
"grad_norm": 0.20701636373996735,
"learning_rate": 0.00018319275869162725,
"loss": 0.0488,
"step": 309000
},
{
"epoch": 31.718332991172243,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.02945251390337944,
"eval_runtime": 22.1144,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 309000
},
{
"epoch": 31.820981318004517,
"grad_norm": 0.16278359293937683,
"learning_rate": 0.00018216416375231435,
"loss": 0.0489,
"step": 310000
},
{
"epoch": 31.820981318004517,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.02939271740615368,
"eval_runtime": 22.2354,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 310000
},
{
"epoch": 31.92362964483679,
"grad_norm": 0.20761480927467346,
"learning_rate": 0.00018113556881300144,
"loss": 0.049,
"step": 311000
},
{
"epoch": 31.92362964483679,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.028488388285040855,
"eval_runtime": 22.1425,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 311000
},
{
"epoch": 32.02627797166906,
"grad_norm": 0.23206296563148499,
"learning_rate": 0.00018010697387368853,
"loss": 0.0487,
"step": 312000
},
{
"epoch": 32.02627797166906,
"eval_cer": 0.017180094786729858,
"eval_loss": 0.028869740664958954,
"eval_runtime": 22.3356,
"eval_samples_per_second": 4.029,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 312000
},
{
"epoch": 32.128926298501334,
"grad_norm": 0.3341107964515686,
"learning_rate": 0.00017907837893437565,
"loss": 0.0481,
"step": 313000
},
{
"epoch": 32.128926298501334,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.029275845736265182,
"eval_runtime": 22.1486,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 313000
},
{
"epoch": 32.23157462533361,
"grad_norm": 0.240467831492424,
"learning_rate": 0.00017804978399506275,
"loss": 0.0481,
"step": 314000
},
{
"epoch": 32.23157462533361,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.02915882132947445,
"eval_runtime": 22.1429,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 314000
},
{
"epoch": 32.33422295216588,
"grad_norm": 0.24573862552642822,
"learning_rate": 0.00017702118905574984,
"loss": 0.0473,
"step": 315000
},
{
"epoch": 32.33422295216588,
"eval_cer": 0.018364928909952605,
"eval_loss": 0.02838301472365856,
"eval_runtime": 22.1497,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 315000
},
{
"epoch": 32.436871278998154,
"grad_norm": 0.3885030746459961,
"learning_rate": 0.00017599259411643696,
"loss": 0.0486,
"step": 316000
},
{
"epoch": 32.436871278998154,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.02944045141339302,
"eval_runtime": 22.1602,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 316000
},
{
"epoch": 32.53951960583043,
"grad_norm": 0.31944355368614197,
"learning_rate": 0.00017496399917712405,
"loss": 0.0483,
"step": 317000
},
{
"epoch": 32.53951960583043,
"eval_cer": 0.0173281990521327,
"eval_loss": 0.028936417773365974,
"eval_runtime": 21.9567,
"eval_samples_per_second": 4.099,
"eval_steps_per_second": 0.046,
"eval_wer": 0.05630026809651475,
"step": 317000
},
{
"epoch": 32.642167932662694,
"grad_norm": 0.2156781703233719,
"learning_rate": 0.00017393540423781115,
"loss": 0.0477,
"step": 318000
},
{
"epoch": 32.642167932662694,
"eval_cer": 0.018661137440758292,
"eval_loss": 0.029529759660363197,
"eval_runtime": 22.0507,
"eval_samples_per_second": 4.082,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06076854334226988,
"step": 318000
},
{
"epoch": 32.74481625949497,
"grad_norm": 0.23771491646766663,
"learning_rate": 0.00017290680929849824,
"loss": 0.048,
"step": 319000
},
{
"epoch": 32.74481625949497,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.029350074008107185,
"eval_runtime": 22.1715,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 319000
},
{
"epoch": 32.84746458632724,
"grad_norm": 0.4010453224182129,
"learning_rate": 0.00017187821435918536,
"loss": 0.0482,
"step": 320000
},
{
"epoch": 32.84746458632724,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.0294723492115736,
"eval_runtime": 22.2642,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 320000
},
{
"epoch": 32.950112913159515,
"grad_norm": 0.21573138236999512,
"learning_rate": 0.00017084961941987245,
"loss": 0.0478,
"step": 321000
},
{
"epoch": 32.950112913159515,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.02849040925502777,
"eval_runtime": 22.3919,
"eval_samples_per_second": 4.019,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 321000
},
{
"epoch": 33.05276123999179,
"grad_norm": 0.2565110921859741,
"learning_rate": 0.00016982102448055955,
"loss": 0.0476,
"step": 322000
},
{
"epoch": 33.05276123999179,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.028644192963838577,
"eval_runtime": 22.4917,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05361930294906166,
"step": 322000
},
{
"epoch": 33.15540956682406,
"grad_norm": 0.17566250264644623,
"learning_rate": 0.00016879242954124667,
"loss": 0.0465,
"step": 323000
},
{
"epoch": 33.15540956682406,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.028031960129737854,
"eval_runtime": 22.2193,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 323000
},
{
"epoch": 33.258057893656336,
"grad_norm": 0.21690410375595093,
"learning_rate": 0.00016776383460193376,
"loss": 0.0474,
"step": 324000
},
{
"epoch": 33.258057893656336,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.028126152232289314,
"eval_runtime": 22.2514,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 324000
},
{
"epoch": 33.36070622048861,
"grad_norm": 0.17805682122707367,
"learning_rate": 0.00016673523966262085,
"loss": 0.047,
"step": 325000
},
{
"epoch": 33.36070622048861,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.028366120532155037,
"eval_runtime": 22.1824,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 325000
},
{
"epoch": 33.463354547320876,
"grad_norm": 0.23276859521865845,
"learning_rate": 0.00016570664472330795,
"loss": 0.0478,
"step": 326000
},
{
"epoch": 33.463354547320876,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.02805442176759243,
"eval_runtime": 22.0401,
"eval_samples_per_second": 4.083,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 326000
},
{
"epoch": 33.56600287415315,
"grad_norm": 0.23791708052158356,
"learning_rate": 0.00016467804978399507,
"loss": 0.0479,
"step": 327000
},
{
"epoch": 33.56600287415315,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.028071463108062744,
"eval_runtime": 22.166,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 327000
},
{
"epoch": 33.66865120098542,
"grad_norm": 0.22921526432037354,
"learning_rate": 0.00016364945484468216,
"loss": 0.0482,
"step": 328000
},
{
"epoch": 33.66865120098542,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.028569117188453674,
"eval_runtime": 22.3334,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 328000
},
{
"epoch": 33.771299527817696,
"grad_norm": 0.1470087319612503,
"learning_rate": 0.00016262085990536925,
"loss": 0.0477,
"step": 329000
},
{
"epoch": 33.771299527817696,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.028614189475774765,
"eval_runtime": 22.1814,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 329000
},
{
"epoch": 33.87394785464997,
"grad_norm": 0.4257276654243469,
"learning_rate": 0.00016159226496605637,
"loss": 0.0483,
"step": 330000
},
{
"epoch": 33.87394785464997,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.028485840186476707,
"eval_runtime": 22.174,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 330000
},
{
"epoch": 33.97659618148224,
"grad_norm": 0.4129483997821808,
"learning_rate": 0.00016056367002674347,
"loss": 0.0475,
"step": 331000
},
{
"epoch": 33.97659618148224,
"eval_cer": 0.01525473933649289,
"eval_loss": 0.028352849185466766,
"eval_runtime": 22.3086,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 331000
},
{
"epoch": 34.07924450831452,
"grad_norm": 0.2835679054260254,
"learning_rate": 0.00015953507508743056,
"loss": 0.0465,
"step": 332000
},
{
"epoch": 34.07924450831452,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.028890669345855713,
"eval_runtime": 22.1774,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 332000
},
{
"epoch": 34.18189283514679,
"grad_norm": 0.2247968167066574,
"learning_rate": 0.00015850648014811765,
"loss": 0.0469,
"step": 333000
},
{
"epoch": 34.18189283514679,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.028413381427526474,
"eval_runtime": 22.0968,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 333000
},
{
"epoch": 34.28454116197906,
"grad_norm": 0.17359092831611633,
"learning_rate": 0.00015747788520880477,
"loss": 0.0468,
"step": 334000
},
{
"epoch": 34.28454116197906,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.028406651690602303,
"eval_runtime": 22.1838,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 334000
},
{
"epoch": 34.38718948881133,
"grad_norm": 0.2905976176261902,
"learning_rate": 0.00015644929026949187,
"loss": 0.0467,
"step": 335000
},
{
"epoch": 34.38718948881133,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.028134917840361595,
"eval_runtime": 22.2384,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 335000
},
{
"epoch": 34.489837815643604,
"grad_norm": 0.41442832350730896,
"learning_rate": 0.00015542069533017896,
"loss": 0.0472,
"step": 336000
},
{
"epoch": 34.489837815643604,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.028330376371741295,
"eval_runtime": 22.2855,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 336000
},
{
"epoch": 34.59248614247588,
"grad_norm": 0.20874008536338806,
"learning_rate": 0.00015439210039086608,
"loss": 0.0472,
"step": 337000
},
{
"epoch": 34.59248614247588,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.028587637469172478,
"eval_runtime": 22.2778,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 337000
},
{
"epoch": 34.69513446930815,
"grad_norm": 0.28286242485046387,
"learning_rate": 0.00015336350545155317,
"loss": 0.047,
"step": 338000
},
{
"epoch": 34.69513446930815,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.028073778375983238,
"eval_runtime": 22.2502,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 338000
},
{
"epoch": 34.797782796140424,
"grad_norm": 0.2530520558357239,
"learning_rate": 0.00015233491051224027,
"loss": 0.0469,
"step": 339000
},
{
"epoch": 34.797782796140424,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.02780935913324356,
"eval_runtime": 22.1698,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 339000
},
{
"epoch": 34.9004311229727,
"grad_norm": 0.24278897047042847,
"learning_rate": 0.00015130631557292736,
"loss": 0.0474,
"step": 340000
},
{
"epoch": 34.9004311229727,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.027951352298259735,
"eval_runtime": 22.2193,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 340000
},
{
"epoch": 35.00307944980497,
"grad_norm": 0.23415499925613403,
"learning_rate": 0.00015027772063361448,
"loss": 0.0469,
"step": 341000
},
{
"epoch": 35.00307944980497,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.02772090956568718,
"eval_runtime": 22.09,
"eval_samples_per_second": 4.074,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 341000
},
{
"epoch": 35.10572777663724,
"grad_norm": 0.4553733468055725,
"learning_rate": 0.00014924912569430157,
"loss": 0.0458,
"step": 342000
},
{
"epoch": 35.10572777663724,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.027903633192181587,
"eval_runtime": 22.1864,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 342000
},
{
"epoch": 35.20837610346951,
"grad_norm": 0.1911894530057907,
"learning_rate": 0.00014822053075498867,
"loss": 0.0464,
"step": 343000
},
{
"epoch": 35.20837610346951,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.0284242145717144,
"eval_runtime": 22.2316,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 343000
},
{
"epoch": 35.311024430301785,
"grad_norm": 0.9561129212379456,
"learning_rate": 0.0001471919358156758,
"loss": 0.0463,
"step": 344000
},
{
"epoch": 35.311024430301785,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.02789863385260105,
"eval_runtime": 22.2194,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 344000
},
{
"epoch": 35.41367275713406,
"grad_norm": 0.2921608090400696,
"learning_rate": 0.0001461633408763629,
"loss": 0.0461,
"step": 345000
},
{
"epoch": 35.41367275713406,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.028261249884963036,
"eval_runtime": 22.0199,
"eval_samples_per_second": 4.087,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 345000
},
{
"epoch": 35.51632108396633,
"grad_norm": 0.19372744858264923,
"learning_rate": 0.00014513474593705,
"loss": 0.0463,
"step": 346000
},
{
"epoch": 35.51632108396633,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.028103064745664597,
"eval_runtime": 22.1334,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 346000
},
{
"epoch": 35.618969410798606,
"grad_norm": 0.27431151270866394,
"learning_rate": 0.00014410615099773712,
"loss": 0.0472,
"step": 347000
},
{
"epoch": 35.618969410798606,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.028425684198737144,
"eval_runtime": 22.139,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05630026809651475,
"step": 347000
},
{
"epoch": 35.72161773763088,
"grad_norm": 0.27128133177757263,
"learning_rate": 0.0001430775560584242,
"loss": 0.0466,
"step": 348000
},
{
"epoch": 35.72161773763088,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.02764798142015934,
"eval_runtime": 22.2733,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05451295799821269,
"step": 348000
},
{
"epoch": 35.824266064463146,
"grad_norm": 0.26995572447776794,
"learning_rate": 0.0001420489611191113,
"loss": 0.0467,
"step": 349000
},
{
"epoch": 35.824266064463146,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.027596063911914825,
"eval_runtime": 22.0467,
"eval_samples_per_second": 4.082,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 349000
},
{
"epoch": 35.92691439129542,
"grad_norm": 0.31239375472068787,
"learning_rate": 0.0001410203661797984,
"loss": 0.0469,
"step": 350000
},
{
"epoch": 35.92691439129542,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.028232304379343987,
"eval_runtime": 22.1495,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 350000
},
{
"epoch": 36.02956271812769,
"grad_norm": 0.344926655292511,
"learning_rate": 0.00013999177124048552,
"loss": 0.0464,
"step": 351000
},
{
"epoch": 36.02956271812769,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.026858482509851456,
"eval_runtime": 22.2106,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 351000
},
{
"epoch": 36.132211044959966,
"grad_norm": 0.1864170879125595,
"learning_rate": 0.0001389631763011726,
"loss": 0.0449,
"step": 352000
},
{
"epoch": 36.132211044959966,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.02711603231728077,
"eval_runtime": 22.2552,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 352000
},
{
"epoch": 36.23485937179224,
"grad_norm": 0.1876976191997528,
"learning_rate": 0.0001379345813618597,
"loss": 0.0466,
"step": 353000
},
{
"epoch": 36.23485937179224,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.027281379327178,
"eval_runtime": 22.2442,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 353000
},
{
"epoch": 36.33750769862451,
"grad_norm": 0.21546737849712372,
"learning_rate": 0.00013690598642254683,
"loss": 0.0458,
"step": 354000
},
{
"epoch": 36.33750769862451,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.02689436264336109,
"eval_runtime": 22.259,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 354000
},
{
"epoch": 36.44015602545679,
"grad_norm": 0.31887394189834595,
"learning_rate": 0.00013587739148323392,
"loss": 0.0459,
"step": 355000
},
{
"epoch": 36.44015602545679,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.027051741257309914,
"eval_runtime": 22.2336,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 355000
},
{
"epoch": 36.54280435228906,
"grad_norm": 0.24578991532325745,
"learning_rate": 0.000134848796543921,
"loss": 0.0469,
"step": 356000
},
{
"epoch": 36.54280435228906,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.026666434481739998,
"eval_runtime": 22.215,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 356000
},
{
"epoch": 36.64545267912133,
"grad_norm": 0.3394581377506256,
"learning_rate": 0.0001338202016046081,
"loss": 0.0458,
"step": 357000
},
{
"epoch": 36.64545267912133,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.02698771469295025,
"eval_runtime": 22.1744,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 357000
},
{
"epoch": 36.7481010059536,
"grad_norm": 0.2731720805168152,
"learning_rate": 0.00013279160666529523,
"loss": 0.0462,
"step": 358000
},
{
"epoch": 36.7481010059536,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.02696968987584114,
"eval_runtime": 22.1592,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 358000
},
{
"epoch": 36.850749332785874,
"grad_norm": 0.2410440891981125,
"learning_rate": 0.00013176301172598232,
"loss": 0.0466,
"step": 359000
},
{
"epoch": 36.850749332785874,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.027307961136102676,
"eval_runtime": 22.243,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 359000
},
{
"epoch": 36.95339765961815,
"grad_norm": 0.28017568588256836,
"learning_rate": 0.0001307344167866694,
"loss": 0.046,
"step": 360000
},
{
"epoch": 36.95339765961815,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.02654258720576763,
"eval_runtime": 22.4072,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 360000
},
{
"epoch": 37.05604598645042,
"grad_norm": 0.23387791216373444,
"learning_rate": 0.00012970582184735653,
"loss": 0.0456,
"step": 361000
},
{
"epoch": 37.05604598645042,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.026880960911512375,
"eval_runtime": 22.1133,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 361000
},
{
"epoch": 37.158694313282695,
"grad_norm": 0.3157537579536438,
"learning_rate": 0.00012867722690804363,
"loss": 0.0452,
"step": 362000
},
{
"epoch": 37.158694313282695,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.026554275304079056,
"eval_runtime": 22.2184,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 362000
},
{
"epoch": 37.26134264011497,
"grad_norm": 0.301200807094574,
"learning_rate": 0.00012764863196873072,
"loss": 0.0456,
"step": 363000
},
{
"epoch": 37.26134264011497,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.02748226933181286,
"eval_runtime": 22.1594,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 363000
},
{
"epoch": 37.36399096694724,
"grad_norm": 0.1987874060869217,
"learning_rate": 0.0001266200370294178,
"loss": 0.0454,
"step": 364000
},
{
"epoch": 37.36399096694724,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.02689102478325367,
"eval_runtime": 22.289,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 364000
},
{
"epoch": 37.46663929377951,
"grad_norm": 0.2465968132019043,
"learning_rate": 0.00012559144209010493,
"loss": 0.0453,
"step": 365000
},
{
"epoch": 37.46663929377951,
"eval_cer": 0.012588862559241706,
"eval_loss": 0.026751089841127396,
"eval_runtime": 21.9569,
"eval_samples_per_second": 4.099,
"eval_steps_per_second": 0.046,
"eval_wer": 0.045576407506702415,
"step": 365000
},
{
"epoch": 37.56928762061178,
"grad_norm": 0.39610666036605835,
"learning_rate": 0.00012456284715079203,
"loss": 0.046,
"step": 366000
},
{
"epoch": 37.56928762061178,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.026830825954675674,
"eval_runtime": 22.2783,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 366000
},
{
"epoch": 37.671935947444055,
"grad_norm": 0.26581278443336487,
"learning_rate": 0.00012353425221147912,
"loss": 0.046,
"step": 367000
},
{
"epoch": 37.671935947444055,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.0267048142850399,
"eval_runtime": 22.1983,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 367000
},
{
"epoch": 37.77458427427633,
"grad_norm": 0.22986672818660736,
"learning_rate": 0.00012250565727216624,
"loss": 0.0459,
"step": 368000
},
{
"epoch": 37.77458427427633,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.026945000514388084,
"eval_runtime": 22.1927,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 368000
},
{
"epoch": 37.8772326011086,
"grad_norm": 0.2647237479686737,
"learning_rate": 0.00012147706233285333,
"loss": 0.0454,
"step": 369000
},
{
"epoch": 37.8772326011086,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.027104683220386505,
"eval_runtime": 22.1006,
"eval_samples_per_second": 4.072,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 369000
},
{
"epoch": 37.979880927940876,
"grad_norm": 0.2698921263217926,
"learning_rate": 0.00012044846739354043,
"loss": 0.0459,
"step": 370000
},
{
"epoch": 37.979880927940876,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.027364252135157585,
"eval_runtime": 22.1262,
"eval_samples_per_second": 4.068,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 370000
},
{
"epoch": 38.08252925477315,
"grad_norm": 0.2091304063796997,
"learning_rate": 0.00011941987245422753,
"loss": 0.0453,
"step": 371000
},
{
"epoch": 38.08252925477315,
"eval_cer": 0.013329383886255925,
"eval_loss": 0.026948757469654083,
"eval_runtime": 22.0181,
"eval_samples_per_second": 4.088,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 371000
},
{
"epoch": 38.18517758160542,
"grad_norm": 0.21031425893306732,
"learning_rate": 0.00011839127751491462,
"loss": 0.0446,
"step": 372000
},
{
"epoch": 38.18517758160542,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.027428090572357178,
"eval_runtime": 22.1193,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 372000
},
{
"epoch": 38.28782590843769,
"grad_norm": 0.16268426179885864,
"learning_rate": 0.00011736268257560173,
"loss": 0.0452,
"step": 373000
},
{
"epoch": 38.28782590843769,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.0269022174179554,
"eval_runtime": 22.3714,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 373000
},
{
"epoch": 38.39047423526996,
"grad_norm": 0.18285077810287476,
"learning_rate": 0.00011633408763628884,
"loss": 0.0456,
"step": 374000
},
{
"epoch": 38.39047423526996,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.026672353968024254,
"eval_runtime": 22.4441,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 374000
},
{
"epoch": 38.493122562102236,
"grad_norm": 0.14922891557216644,
"learning_rate": 0.00011530549269697593,
"loss": 0.0449,
"step": 375000
},
{
"epoch": 38.493122562102236,
"eval_cer": 0.01525473933649289,
"eval_loss": 0.026156587526202202,
"eval_runtime": 22.2935,
"eval_samples_per_second": 4.037,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 375000
},
{
"epoch": 38.59577088893451,
"grad_norm": 0.2601664662361145,
"learning_rate": 0.00011427689775766304,
"loss": 0.0453,
"step": 376000
},
{
"epoch": 38.59577088893451,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.026526469737291336,
"eval_runtime": 22.4581,
"eval_samples_per_second": 4.007,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 376000
},
{
"epoch": 38.69841921576678,
"grad_norm": 0.23301970958709717,
"learning_rate": 0.00011324830281835013,
"loss": 0.0454,
"step": 377000
},
{
"epoch": 38.69841921576678,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.026089007034897804,
"eval_runtime": 22.0613,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 377000
},
{
"epoch": 38.80106754259906,
"grad_norm": 0.1848640739917755,
"learning_rate": 0.00011221970787903724,
"loss": 0.0455,
"step": 378000
},
{
"epoch": 38.80106754259906,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.02705644629895687,
"eval_runtime": 22.0628,
"eval_samples_per_second": 4.079,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 378000
},
{
"epoch": 38.90371586943133,
"grad_norm": 0.2265135794878006,
"learning_rate": 0.00011119111293972433,
"loss": 0.0449,
"step": 379000
},
{
"epoch": 38.90371586943133,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.026829397305846214,
"eval_runtime": 22.1974,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 379000
},
{
"epoch": 39.006364196263604,
"grad_norm": 0.18336538970470428,
"learning_rate": 0.00011016251800041144,
"loss": 0.045,
"step": 380000
},
{
"epoch": 39.006364196263604,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.02715076506137848,
"eval_runtime": 22.2325,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 380000
},
{
"epoch": 39.10901252309587,
"grad_norm": 0.2116203010082245,
"learning_rate": 0.00010913392306109854,
"loss": 0.0442,
"step": 381000
},
{
"epoch": 39.10901252309587,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.026725102216005325,
"eval_runtime": 22.0106,
"eval_samples_per_second": 4.089,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 381000
},
{
"epoch": 39.211660849928144,
"grad_norm": 0.1735929548740387,
"learning_rate": 0.00010810532812178564,
"loss": 0.0446,
"step": 382000
},
{
"epoch": 39.211660849928144,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.026632068678736687,
"eval_runtime": 22.2735,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 382000
},
{
"epoch": 39.31430917676042,
"grad_norm": 0.29244282841682434,
"learning_rate": 0.00010707673318247274,
"loss": 0.0443,
"step": 383000
},
{
"epoch": 39.31430917676042,
"eval_cer": 0.01525473933649289,
"eval_loss": 0.02649509161710739,
"eval_runtime": 22.223,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 383000
},
{
"epoch": 39.41695750359269,
"grad_norm": 0.21645478904247284,
"learning_rate": 0.00010604813824315984,
"loss": 0.0448,
"step": 384000
},
{
"epoch": 39.41695750359269,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.02655700594186783,
"eval_runtime": 22.1333,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 384000
},
{
"epoch": 39.519605830424965,
"grad_norm": 0.24354924261569977,
"learning_rate": 0.00010501954330384694,
"loss": 0.0447,
"step": 385000
},
{
"epoch": 39.519605830424965,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.026025088503956795,
"eval_runtime": 22.2171,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 385000
},
{
"epoch": 39.62225415725724,
"grad_norm": 0.15328273177146912,
"learning_rate": 0.00010399094836453404,
"loss": 0.0452,
"step": 386000
},
{
"epoch": 39.62225415725724,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.025482947006821632,
"eval_runtime": 22.319,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 386000
},
{
"epoch": 39.72490248408951,
"grad_norm": 0.25354963541030884,
"learning_rate": 0.00010296235342522114,
"loss": 0.0447,
"step": 387000
},
{
"epoch": 39.72490248408951,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.026158807799220085,
"eval_runtime": 22.2432,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 387000
},
{
"epoch": 39.827550810921785,
"grad_norm": 0.18456153571605682,
"learning_rate": 0.00010193375848590825,
"loss": 0.0452,
"step": 388000
},
{
"epoch": 39.827550810921785,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.026585763320326805,
"eval_runtime": 22.3419,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 388000
},
{
"epoch": 39.93019913775405,
"grad_norm": 1.1289212703704834,
"learning_rate": 0.00010090516354659534,
"loss": 0.0451,
"step": 389000
},
{
"epoch": 39.93019913775405,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.026965312659740448,
"eval_runtime": 22.2111,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 389000
},
{
"epoch": 40.032847464586325,
"grad_norm": 0.2642553448677063,
"learning_rate": 9.987656860728245e-05,
"loss": 0.0441,
"step": 390000
},
{
"epoch": 40.032847464586325,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.02652685157954693,
"eval_runtime": 21.9945,
"eval_samples_per_second": 4.092,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 390000
},
{
"epoch": 40.1354957914186,
"grad_norm": 0.2834232449531555,
"learning_rate": 9.884797366796956e-05,
"loss": 0.0441,
"step": 391000
},
{
"epoch": 40.1354957914186,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.026538578793406487,
"eval_runtime": 21.6963,
"eval_samples_per_second": 4.148,
"eval_steps_per_second": 0.046,
"eval_wer": 0.049151027703306524,
"step": 391000
},
{
"epoch": 40.23814411825087,
"grad_norm": 0.366251140832901,
"learning_rate": 9.781937872865666e-05,
"loss": 0.044,
"step": 392000
},
{
"epoch": 40.23814411825087,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.025871722027659416,
"eval_runtime": 21.9657,
"eval_samples_per_second": 4.097,
"eval_steps_per_second": 0.046,
"eval_wer": 0.050044682752457555,
"step": 392000
},
{
"epoch": 40.340792445083146,
"grad_norm": 0.1964322179555893,
"learning_rate": 9.679078378934376e-05,
"loss": 0.0442,
"step": 393000
},
{
"epoch": 40.340792445083146,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.025851983577013016,
"eval_runtime": 21.9412,
"eval_samples_per_second": 4.102,
"eval_steps_per_second": 0.046,
"eval_wer": 0.04736371760500447,
"step": 393000
},
{
"epoch": 40.44344077191542,
"grad_norm": 0.19335012137889862,
"learning_rate": 9.576218885003086e-05,
"loss": 0.0444,
"step": 394000
},
{
"epoch": 40.44344077191542,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.025898663327097893,
"eval_runtime": 22.0686,
"eval_samples_per_second": 4.078,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 394000
},
{
"epoch": 40.54608909874769,
"grad_norm": 0.2371419221162796,
"learning_rate": 9.473359391071797e-05,
"loss": 0.0448,
"step": 395000
},
{
"epoch": 40.54608909874769,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.02598469704389572,
"eval_runtime": 22.2182,
"eval_samples_per_second": 4.051,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 395000
},
{
"epoch": 40.64873742557997,
"grad_norm": 0.190704807639122,
"learning_rate": 9.370499897140506e-05,
"loss": 0.0446,
"step": 396000
},
{
"epoch": 40.64873742557997,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.025896675884723663,
"eval_runtime": 22.4373,
"eval_samples_per_second": 4.011,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 396000
},
{
"epoch": 40.75138575241223,
"grad_norm": 0.24071630835533142,
"learning_rate": 9.267640403209217e-05,
"loss": 0.0445,
"step": 397000
},
{
"epoch": 40.75138575241223,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.025766143575310707,
"eval_runtime": 22.1683,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 397000
},
{
"epoch": 40.85403407924451,
"grad_norm": 0.37343931198120117,
"learning_rate": 9.164780909277926e-05,
"loss": 0.044,
"step": 398000
},
{
"epoch": 40.85403407924451,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.025763213634490967,
"eval_runtime": 22.2275,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 398000
},
{
"epoch": 40.95668240607678,
"grad_norm": 0.2885558605194092,
"learning_rate": 9.061921415346637e-05,
"loss": 0.0446,
"step": 399000
},
{
"epoch": 40.95668240607678,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.02574954554438591,
"eval_runtime": 22.2623,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 399000
},
{
"epoch": 41.05933073290905,
"grad_norm": 0.2904144525527954,
"learning_rate": 8.959061921415348e-05,
"loss": 0.0437,
"step": 400000
},
{
"epoch": 41.05933073290905,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.026339180767536163,
"eval_runtime": 22.3982,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 400000
},
{
"epoch": 41.16197905974133,
"grad_norm": 0.47024524211883545,
"learning_rate": 8.856202427484057e-05,
"loss": 0.0441,
"step": 401000
},
{
"epoch": 41.16197905974133,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.026481064036488533,
"eval_runtime": 22.1309,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 401000
},
{
"epoch": 41.2646273865736,
"grad_norm": 0.20567986369132996,
"learning_rate": 8.753342933552768e-05,
"loss": 0.0438,
"step": 402000
},
{
"epoch": 41.2646273865736,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.026423340663313866,
"eval_runtime": 22.1647,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 402000
},
{
"epoch": 41.367275713405874,
"grad_norm": 0.25154054164886475,
"learning_rate": 8.650483439621477e-05,
"loss": 0.0439,
"step": 403000
},
{
"epoch": 41.367275713405874,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.025870798155665398,
"eval_runtime": 22.2814,
"eval_samples_per_second": 4.039,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 403000
},
{
"epoch": 41.46992404023814,
"grad_norm": 0.22818030416965485,
"learning_rate": 8.547623945690188e-05,
"loss": 0.0437,
"step": 404000
},
{
"epoch": 41.46992404023814,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.025692187249660492,
"eval_runtime": 22.2761,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 404000
},
{
"epoch": 41.572572367070414,
"grad_norm": 0.20878200232982635,
"learning_rate": 8.444764451758897e-05,
"loss": 0.044,
"step": 405000
},
{
"epoch": 41.572572367070414,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.025789031758904457,
"eval_runtime": 22.1041,
"eval_samples_per_second": 4.072,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 405000
},
{
"epoch": 41.67522069390269,
"grad_norm": 0.37076711654663086,
"learning_rate": 8.341904957827608e-05,
"loss": 0.0438,
"step": 406000
},
{
"epoch": 41.67522069390269,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.025606298819184303,
"eval_runtime": 22.1493,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 406000
},
{
"epoch": 41.77786902073496,
"grad_norm": 0.3799729347229004,
"learning_rate": 8.239045463896318e-05,
"loss": 0.0439,
"step": 407000
},
{
"epoch": 41.77786902073496,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.02529660426080227,
"eval_runtime": 22.1548,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 407000
},
{
"epoch": 41.880517347567235,
"grad_norm": 0.1976720541715622,
"learning_rate": 8.136185969965028e-05,
"loss": 0.0442,
"step": 408000
},
{
"epoch": 41.880517347567235,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.025364946573972702,
"eval_runtime": 22.2419,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 408000
},
{
"epoch": 41.98316567439951,
"grad_norm": 0.35871848464012146,
"learning_rate": 8.033326476033738e-05,
"loss": 0.0438,
"step": 409000
},
{
"epoch": 41.98316567439951,
"eval_cer": 0.016143364928909953,
"eval_loss": 0.02546422928571701,
"eval_runtime": 22.0995,
"eval_samples_per_second": 4.072,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 409000
},
{
"epoch": 42.08581400123178,
"grad_norm": 0.3228365480899811,
"learning_rate": 7.930466982102448e-05,
"loss": 0.0435,
"step": 410000
},
{
"epoch": 42.08581400123178,
"eval_cer": 0.013921800947867298,
"eval_loss": 0.025534870103001595,
"eval_runtime": 22.0953,
"eval_samples_per_second": 4.073,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 410000
},
{
"epoch": 42.188462328064055,
"grad_norm": 0.32005515694618225,
"learning_rate": 7.827607488171158e-05,
"loss": 0.0432,
"step": 411000
},
{
"epoch": 42.188462328064055,
"eval_cer": 0.01229265402843602,
"eval_loss": 0.024984827265143394,
"eval_runtime": 22.11,
"eval_samples_per_second": 4.071,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 411000
},
{
"epoch": 42.29111065489632,
"grad_norm": 0.2567862570285797,
"learning_rate": 7.724747994239868e-05,
"loss": 0.0435,
"step": 412000
},
{
"epoch": 42.29111065489632,
"eval_cer": 0.01525473933649289,
"eval_loss": 0.025618551298975945,
"eval_runtime": 22.2013,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05183199285075961,
"step": 412000
},
{
"epoch": 42.393758981728595,
"grad_norm": 0.39079025387763977,
"learning_rate": 7.621888500308578e-05,
"loss": 0.0434,
"step": 413000
},
{
"epoch": 42.393758981728595,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.025224734097719193,
"eval_runtime": 21.8997,
"eval_samples_per_second": 4.11,
"eval_steps_per_second": 0.046,
"eval_wer": 0.045576407506702415,
"step": 413000
},
{
"epoch": 42.49640730856087,
"grad_norm": 0.20112274587154388,
"learning_rate": 7.519029006377289e-05,
"loss": 0.0434,
"step": 414000
},
{
"epoch": 42.49640730856087,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.02557324431836605,
"eval_runtime": 22.1285,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 414000
},
{
"epoch": 42.59905563539314,
"grad_norm": 0.29187527298927307,
"learning_rate": 7.416169512445998e-05,
"loss": 0.0433,
"step": 415000
},
{
"epoch": 42.59905563539314,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.02564132958650589,
"eval_runtime": 22.0916,
"eval_samples_per_second": 4.074,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 415000
},
{
"epoch": 42.701703962225416,
"grad_norm": 0.20293624699115753,
"learning_rate": 7.313310018514709e-05,
"loss": 0.0437,
"step": 416000
},
{
"epoch": 42.701703962225416,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.025618135929107666,
"eval_runtime": 22.0252,
"eval_samples_per_second": 4.086,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 416000
},
{
"epoch": 42.80435228905769,
"grad_norm": 0.2777237296104431,
"learning_rate": 7.210450524583418e-05,
"loss": 0.0438,
"step": 417000
},
{
"epoch": 42.80435228905769,
"eval_cer": 0.01569905213270142,
"eval_loss": 0.025969378650188446,
"eval_runtime": 22.1694,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 417000
},
{
"epoch": 42.90700061588996,
"grad_norm": 0.29848456382751465,
"learning_rate": 7.107591030652129e-05,
"loss": 0.0436,
"step": 418000
},
{
"epoch": 42.90700061588996,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.02571621723473072,
"eval_runtime": 22.2799,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 418000
},
{
"epoch": 43.00964894272224,
"grad_norm": 0.21752919256687164,
"learning_rate": 7.004731536720838e-05,
"loss": 0.0435,
"step": 419000
},
{
"epoch": 43.00964894272224,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.025668691843748093,
"eval_runtime": 22.1447,
"eval_samples_per_second": 4.064,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 419000
},
{
"epoch": 43.1122972695545,
"grad_norm": 0.3110567033290863,
"learning_rate": 6.901872042789549e-05,
"loss": 0.0431,
"step": 420000
},
{
"epoch": 43.1122972695545,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.025269243866205215,
"eval_runtime": 22.5329,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.045576407506702415,
"step": 420000
},
{
"epoch": 43.21494559638678,
"grad_norm": 0.26389312744140625,
"learning_rate": 6.79901254885826e-05,
"loss": 0.0429,
"step": 421000
},
{
"epoch": 43.21494559638678,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.025585921481251717,
"eval_runtime": 22.2351,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 421000
},
{
"epoch": 43.31759392321905,
"grad_norm": 0.2228713184595108,
"learning_rate": 6.69615305492697e-05,
"loss": 0.043,
"step": 422000
},
{
"epoch": 43.31759392321905,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.02543068863451481,
"eval_runtime": 22.1864,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 422000
},
{
"epoch": 43.420242250051324,
"grad_norm": 0.1402529925107956,
"learning_rate": 6.593293560995681e-05,
"loss": 0.0439,
"step": 423000
},
{
"epoch": 43.420242250051324,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.025465745478868484,
"eval_runtime": 22.3826,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 423000
},
{
"epoch": 43.5228905768836,
"grad_norm": 0.3183715045452118,
"learning_rate": 6.49043406706439e-05,
"loss": 0.0431,
"step": 424000
},
{
"epoch": 43.5228905768836,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.025318369269371033,
"eval_runtime": 22.2919,
"eval_samples_per_second": 4.037,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 424000
},
{
"epoch": 43.62553890371587,
"grad_norm": 0.29827529191970825,
"learning_rate": 6.387574573133101e-05,
"loss": 0.0434,
"step": 425000
},
{
"epoch": 43.62553890371587,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.025606686249375343,
"eval_runtime": 22.0625,
"eval_samples_per_second": 4.079,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 425000
},
{
"epoch": 43.728187230548144,
"grad_norm": 0.16081830859184265,
"learning_rate": 6.28471507920181e-05,
"loss": 0.0431,
"step": 426000
},
{
"epoch": 43.728187230548144,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.025782672688364983,
"eval_runtime": 22.1883,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 426000
},
{
"epoch": 43.83083555738042,
"grad_norm": 0.19551779329776764,
"learning_rate": 6.181855585270521e-05,
"loss": 0.0437,
"step": 427000
},
{
"epoch": 43.83083555738042,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.025874827057123184,
"eval_runtime": 22.0445,
"eval_samples_per_second": 4.083,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 427000
},
{
"epoch": 43.933483884212684,
"grad_norm": 0.3405396044254303,
"learning_rate": 6.07899609133923e-05,
"loss": 0.0431,
"step": 428000
},
{
"epoch": 43.933483884212684,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.025233900174498558,
"eval_runtime": 22.1498,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 428000
},
{
"epoch": 44.03613221104496,
"grad_norm": 0.1871403306722641,
"learning_rate": 5.97613659740794e-05,
"loss": 0.0424,
"step": 429000
},
{
"epoch": 44.03613221104496,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.025231176987290382,
"eval_runtime": 22.1695,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 429000
},
{
"epoch": 44.13878053787723,
"grad_norm": 0.272748202085495,
"learning_rate": 5.873277103476652e-05,
"loss": 0.0425,
"step": 430000
},
{
"epoch": 44.13878053787723,
"eval_cer": 0.012885071090047393,
"eval_loss": 0.025549624115228653,
"eval_runtime": 22.1325,
"eval_samples_per_second": 4.066,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 430000
},
{
"epoch": 44.241428864709505,
"grad_norm": 0.15834620594978333,
"learning_rate": 5.7704176095453617e-05,
"loss": 0.0428,
"step": 431000
},
{
"epoch": 44.241428864709505,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.02536383457481861,
"eval_runtime": 22.0624,
"eval_samples_per_second": 4.079,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 431000
},
{
"epoch": 44.34407719154178,
"grad_norm": 0.21297834813594818,
"learning_rate": 5.6675581156140717e-05,
"loss": 0.0426,
"step": 432000
},
{
"epoch": 44.34407719154178,
"eval_cer": 0.013033175355450236,
"eval_loss": 0.025508729740977287,
"eval_runtime": 22.2302,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 432000
},
{
"epoch": 44.44672551837405,
"grad_norm": 0.3164765536785126,
"learning_rate": 5.5646986216827816e-05,
"loss": 0.0428,
"step": 433000
},
{
"epoch": 44.44672551837405,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.025067314505577087,
"eval_runtime": 22.074,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 433000
},
{
"epoch": 44.549373845206325,
"grad_norm": 0.14805859327316284,
"learning_rate": 5.4618391277514916e-05,
"loss": 0.0435,
"step": 434000
},
{
"epoch": 44.549373845206325,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.02515277825295925,
"eval_runtime": 22.2077,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 434000
},
{
"epoch": 44.6520221720386,
"grad_norm": 0.19713029265403748,
"learning_rate": 5.3589796338202016e-05,
"loss": 0.0429,
"step": 435000
},
{
"epoch": 44.6520221720386,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.02521173469722271,
"eval_runtime": 22.2596,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 435000
},
{
"epoch": 44.754670498870865,
"grad_norm": 0.2486603856086731,
"learning_rate": 5.2561201398889116e-05,
"loss": 0.0428,
"step": 436000
},
{
"epoch": 44.754670498870865,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.025502758100628853,
"eval_runtime": 22.2902,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 436000
},
{
"epoch": 44.85731882570314,
"grad_norm": 0.29049795866012573,
"learning_rate": 5.153260645957622e-05,
"loss": 0.0425,
"step": 437000
},
{
"epoch": 44.85731882570314,
"eval_cer": 0.01525473933649289,
"eval_loss": 0.02529684267938137,
"eval_runtime": 22.181,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 437000
},
{
"epoch": 44.95996715253541,
"grad_norm": 0.2436273992061615,
"learning_rate": 5.050401152026332e-05,
"loss": 0.0428,
"step": 438000
},
{
"epoch": 44.95996715253541,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.024828782305121422,
"eval_runtime": 22.2395,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 438000
},
{
"epoch": 45.062615479367686,
"grad_norm": 0.24658174812793732,
"learning_rate": 4.947541658095042e-05,
"loss": 0.0424,
"step": 439000
},
{
"epoch": 45.062615479367686,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.025087928399443626,
"eval_runtime": 22.0216,
"eval_samples_per_second": 4.087,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 439000
},
{
"epoch": 45.16526380619996,
"grad_norm": 0.39545565843582153,
"learning_rate": 4.844682164163752e-05,
"loss": 0.0423,
"step": 440000
},
{
"epoch": 45.16526380619996,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.0248849056661129,
"eval_runtime": 22.0732,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 440000
},
{
"epoch": 45.26791213303223,
"grad_norm": 0.3729030191898346,
"learning_rate": 4.741822670232462e-05,
"loss": 0.0421,
"step": 441000
},
{
"epoch": 45.26791213303223,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.024852894246578217,
"eval_runtime": 22.1136,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 441000
},
{
"epoch": 45.37056045986451,
"grad_norm": 0.2696306109428406,
"learning_rate": 4.638963176301172e-05,
"loss": 0.0424,
"step": 442000
},
{
"epoch": 45.37056045986451,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.024875616654753685,
"eval_runtime": 22.2425,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 442000
},
{
"epoch": 45.47320878669678,
"grad_norm": 0.18253710865974426,
"learning_rate": 4.536103682369882e-05,
"loss": 0.0427,
"step": 443000
},
{
"epoch": 45.47320878669678,
"eval_cer": 0.013921800947867298,
"eval_loss": 0.02515345811843872,
"eval_runtime": 22.4087,
"eval_samples_per_second": 4.016,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 443000
},
{
"epoch": 45.57585711352905,
"grad_norm": 0.5241480469703674,
"learning_rate": 4.433244188438593e-05,
"loss": 0.0424,
"step": 444000
},
{
"epoch": 45.57585711352905,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.024990374222397804,
"eval_runtime": 22.5101,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04647006255585344,
"step": 444000
},
{
"epoch": 45.67850544036132,
"grad_norm": 0.26579299569129944,
"learning_rate": 4.330384694507303e-05,
"loss": 0.0422,
"step": 445000
},
{
"epoch": 45.67850544036132,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.025033339858055115,
"eval_runtime": 22.2997,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 445000
},
{
"epoch": 45.781153767193594,
"grad_norm": 0.15677900612354279,
"learning_rate": 4.2275252005760136e-05,
"loss": 0.0424,
"step": 446000
},
{
"epoch": 45.781153767193594,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.025280646979808807,
"eval_runtime": 22.1999,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 446000
},
{
"epoch": 45.88380209402587,
"grad_norm": 0.25194716453552246,
"learning_rate": 4.1246657066447236e-05,
"loss": 0.0428,
"step": 447000
},
{
"epoch": 45.88380209402587,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.02510838583111763,
"eval_runtime": 22.1534,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 447000
},
{
"epoch": 45.98645042085814,
"grad_norm": 0.24914862215518951,
"learning_rate": 4.0218062127134336e-05,
"loss": 0.0428,
"step": 448000
},
{
"epoch": 45.98645042085814,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.02476254291832447,
"eval_runtime": 22.087,
"eval_samples_per_second": 4.075,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 448000
},
{
"epoch": 46.089098747690414,
"grad_norm": 0.318974107503891,
"learning_rate": 3.9189467187821436e-05,
"loss": 0.0421,
"step": 449000
},
{
"epoch": 46.089098747690414,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.02481299825012684,
"eval_runtime": 21.9633,
"eval_samples_per_second": 4.098,
"eval_steps_per_second": 0.046,
"eval_wer": 0.043789097408400354,
"step": 449000
},
{
"epoch": 46.19174707452269,
"grad_norm": 0.21947523951530457,
"learning_rate": 3.816087224850854e-05,
"loss": 0.0421,
"step": 450000
},
{
"epoch": 46.19174707452269,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.025007640942931175,
"eval_runtime": 22.0576,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 450000
},
{
"epoch": 46.29439540135496,
"grad_norm": 0.2869652807712555,
"learning_rate": 3.713227730919564e-05,
"loss": 0.0417,
"step": 451000
},
{
"epoch": 46.29439540135496,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.025139357894659042,
"eval_runtime": 22.1428,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 451000
},
{
"epoch": 46.39704372818723,
"grad_norm": 0.24226853251457214,
"learning_rate": 3.610368236988274e-05,
"loss": 0.0423,
"step": 452000
},
{
"epoch": 46.39704372818723,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.02465611696243286,
"eval_runtime": 22.2059,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 452000
},
{
"epoch": 46.4996920550195,
"grad_norm": 0.508613646030426,
"learning_rate": 3.507508743056984e-05,
"loss": 0.0422,
"step": 453000
},
{
"epoch": 46.4996920550195,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.024790233001112938,
"eval_runtime": 22.0074,
"eval_samples_per_second": 4.09,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 453000
},
{
"epoch": 46.602340381851775,
"grad_norm": 0.22070400416851044,
"learning_rate": 3.404649249125694e-05,
"loss": 0.0415,
"step": 454000
},
{
"epoch": 46.602340381851775,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.024966726079583168,
"eval_runtime": 22.0647,
"eval_samples_per_second": 4.079,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 454000
},
{
"epoch": 46.70498870868405,
"grad_norm": 0.19661836326122284,
"learning_rate": 3.301789755194404e-05,
"loss": 0.0424,
"step": 455000
},
{
"epoch": 46.70498870868405,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.024841254577040672,
"eval_runtime": 22.1576,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 455000
},
{
"epoch": 46.80763703551632,
"grad_norm": 0.475782185792923,
"learning_rate": 3.198930261263114e-05,
"loss": 0.0424,
"step": 456000
},
{
"epoch": 46.80763703551632,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.02477007918059826,
"eval_runtime": 22.1397,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 456000
},
{
"epoch": 46.910285362348596,
"grad_norm": 0.36696240305900574,
"learning_rate": 3.096070767331825e-05,
"loss": 0.0426,
"step": 457000
},
{
"epoch": 46.910285362348596,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.024692127481102943,
"eval_runtime": 22.0727,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 457000
},
{
"epoch": 47.01293368918087,
"grad_norm": 0.1705227941274643,
"learning_rate": 2.993211273400535e-05,
"loss": 0.0422,
"step": 458000
},
{
"epoch": 47.01293368918087,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.024774568155407906,
"eval_runtime": 22.1972,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 458000
},
{
"epoch": 47.115582016013136,
"grad_norm": 0.21439406275749207,
"learning_rate": 2.8903517794692452e-05,
"loss": 0.0418,
"step": 459000
},
{
"epoch": 47.115582016013136,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.024719279259443283,
"eval_runtime": 22.0766,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 459000
},
{
"epoch": 47.21823034284541,
"grad_norm": 0.34435534477233887,
"learning_rate": 2.7874922855379552e-05,
"loss": 0.0415,
"step": 460000
},
{
"epoch": 47.21823034284541,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.024668768048286438,
"eval_runtime": 22.0611,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 460000
},
{
"epoch": 47.32087866967768,
"grad_norm": 0.25130486488342285,
"learning_rate": 2.6846327916066652e-05,
"loss": 0.0417,
"step": 461000
},
{
"epoch": 47.32087866967768,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.024554278701543808,
"eval_runtime": 22.0634,
"eval_samples_per_second": 4.079,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 461000
},
{
"epoch": 47.423526996509956,
"grad_norm": 0.29562491178512573,
"learning_rate": 2.5817732976753755e-05,
"loss": 0.0414,
"step": 462000
},
{
"epoch": 47.423526996509956,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.02472042851150036,
"eval_runtime": 22.1921,
"eval_samples_per_second": 4.055,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 462000
},
{
"epoch": 47.52617532334223,
"grad_norm": 0.4088131785392761,
"learning_rate": 2.4789138037440855e-05,
"loss": 0.0421,
"step": 463000
},
{
"epoch": 47.52617532334223,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.024495158344507217,
"eval_runtime": 22.2123,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 463000
},
{
"epoch": 47.6288236501745,
"grad_norm": 0.27792465686798096,
"learning_rate": 2.3760543098127955e-05,
"loss": 0.0419,
"step": 464000
},
{
"epoch": 47.6288236501745,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.02450607530772686,
"eval_runtime": 22.2255,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 464000
},
{
"epoch": 47.73147197700678,
"grad_norm": 0.19159696996212006,
"learning_rate": 2.2731948158815062e-05,
"loss": 0.042,
"step": 465000
},
{
"epoch": 47.73147197700678,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.02452634647488594,
"eval_runtime": 22.143,
"eval_samples_per_second": 4.064,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 465000
},
{
"epoch": 47.83412030383905,
"grad_norm": 0.2656868100166321,
"learning_rate": 2.1703353219502162e-05,
"loss": 0.0412,
"step": 466000
},
{
"epoch": 47.83412030383905,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.024535449221730232,
"eval_runtime": 22.2342,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 466000
},
{
"epoch": 47.93676863067132,
"grad_norm": 0.15053987503051758,
"learning_rate": 2.0674758280189262e-05,
"loss": 0.0416,
"step": 467000
},
{
"epoch": 47.93676863067132,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.024381397292017937,
"eval_runtime": 22.2314,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 467000
},
{
"epoch": 48.03941695750359,
"grad_norm": 0.2812643051147461,
"learning_rate": 1.9646163340876362e-05,
"loss": 0.0417,
"step": 468000
},
{
"epoch": 48.03941695750359,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.02442990057170391,
"eval_runtime": 22.3372,
"eval_samples_per_second": 4.029,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 468000
},
{
"epoch": 48.142065284335864,
"grad_norm": 0.31642502546310425,
"learning_rate": 1.8617568401563465e-05,
"loss": 0.0419,
"step": 469000
},
{
"epoch": 48.142065284335864,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.024683654308319092,
"eval_runtime": 22.1497,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 469000
},
{
"epoch": 48.24471361116814,
"grad_norm": 0.24842867255210876,
"learning_rate": 1.7588973462250565e-05,
"loss": 0.0413,
"step": 470000
},
{
"epoch": 48.24471361116814,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.0246568750590086,
"eval_runtime": 22.1198,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 470000
},
{
"epoch": 48.34736193800041,
"grad_norm": 0.22725574672222137,
"learning_rate": 1.6560378522937665e-05,
"loss": 0.0417,
"step": 471000
},
{
"epoch": 48.34736193800041,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.024483025074005127,
"eval_runtime": 22.0753,
"eval_samples_per_second": 4.077,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 471000
},
{
"epoch": 48.450010264832684,
"grad_norm": 0.415797621011734,
"learning_rate": 1.553178358362477e-05,
"loss": 0.0412,
"step": 472000
},
{
"epoch": 48.450010264832684,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.024604879319667816,
"eval_runtime": 22.2084,
"eval_samples_per_second": 4.053,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 472000
},
{
"epoch": 48.55265859166496,
"grad_norm": 0.4157191216945648,
"learning_rate": 1.450318864431187e-05,
"loss": 0.0416,
"step": 473000
},
{
"epoch": 48.55265859166496,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.024540413171052933,
"eval_runtime": 21.9902,
"eval_samples_per_second": 4.093,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 473000
},
{
"epoch": 48.65530691849723,
"grad_norm": 0.20073458552360535,
"learning_rate": 1.3474593704998972e-05,
"loss": 0.0413,
"step": 474000
},
{
"epoch": 48.65530691849723,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.02452738583087921,
"eval_runtime": 22.0306,
"eval_samples_per_second": 4.085,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 474000
},
{
"epoch": 48.7579552453295,
"grad_norm": 0.3352334499359131,
"learning_rate": 1.2445998765686073e-05,
"loss": 0.0413,
"step": 475000
},
{
"epoch": 48.7579552453295,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.024565977975726128,
"eval_runtime": 22.1491,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 475000
},
{
"epoch": 48.86060357216177,
"grad_norm": 0.20034602284431458,
"learning_rate": 1.1417403826373175e-05,
"loss": 0.0418,
"step": 476000
},
{
"epoch": 48.86060357216177,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.024530308321118355,
"eval_runtime": 22.0802,
"eval_samples_per_second": 4.076,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 476000
},
{
"epoch": 48.963251898994045,
"grad_norm": 0.204274982213974,
"learning_rate": 1.0388808887060275e-05,
"loss": 0.0417,
"step": 477000
},
{
"epoch": 48.963251898994045,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.02437027543783188,
"eval_runtime": 22.051,
"eval_samples_per_second": 4.081,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 477000
},
{
"epoch": 49.06590022582632,
"grad_norm": 0.27974188327789307,
"learning_rate": 9.360213947747377e-06,
"loss": 0.0417,
"step": 478000
},
{
"epoch": 49.06590022582632,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.024484841153025627,
"eval_runtime": 22.1209,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 478000
},
{
"epoch": 49.16854855265859,
"grad_norm": 0.15514741837978363,
"learning_rate": 8.33161900843448e-06,
"loss": 0.0413,
"step": 479000
},
{
"epoch": 49.16854855265859,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.024350464344024658,
"eval_runtime": 21.9927,
"eval_samples_per_second": 4.092,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 479000
},
{
"epoch": 49.271196879490866,
"grad_norm": 0.13631823658943176,
"learning_rate": 7.30302406912158e-06,
"loss": 0.0413,
"step": 480000
},
{
"epoch": 49.271196879490866,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.024330323562026024,
"eval_runtime": 22.2136,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 480000
},
{
"epoch": 49.37384520632314,
"grad_norm": 0.18198832869529724,
"learning_rate": 6.274429129808681e-06,
"loss": 0.0409,
"step": 481000
},
{
"epoch": 49.37384520632314,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.024377064779400826,
"eval_runtime": 22.1427,
"eval_samples_per_second": 4.065,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 481000
},
{
"epoch": 49.47649353315541,
"grad_norm": 0.20921219885349274,
"learning_rate": 5.245834190495783e-06,
"loss": 0.0413,
"step": 482000
},
{
"epoch": 49.47649353315541,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.024373607710003853,
"eval_runtime": 22.062,
"eval_samples_per_second": 4.079,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 482000
},
{
"epoch": 49.57914185998768,
"grad_norm": 0.24988599121570587,
"learning_rate": 4.217239251182884e-06,
"loss": 0.0414,
"step": 483000
},
{
"epoch": 49.57914185998768,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.024304602295160294,
"eval_runtime": 12.6617,
"eval_samples_per_second": 7.108,
"eval_steps_per_second": 0.079,
"eval_wer": 0.04647006255585344,
"step": 483000
},
{
"epoch": 49.68179018681995,
"grad_norm": 0.22157305479049683,
"learning_rate": 3.1886443118699856e-06,
"loss": 0.0407,
"step": 484000
},
{
"epoch": 49.68179018681995,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.02429259568452835,
"eval_runtime": 12.7319,
"eval_samples_per_second": 7.069,
"eval_steps_per_second": 0.079,
"eval_wer": 0.04647006255585344,
"step": 484000
},
{
"epoch": 49.784438513652226,
"grad_norm": 0.18318428099155426,
"learning_rate": 2.1600493725570872e-06,
"loss": 0.0413,
"step": 485000
},
{
"epoch": 49.784438513652226,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.024326322600245476,
"eval_runtime": 12.7187,
"eval_samples_per_second": 7.076,
"eval_steps_per_second": 0.079,
"eval_wer": 0.04647006255585344,
"step": 485000
},
{
"epoch": 49.8870868404845,
"grad_norm": 0.23897279798984528,
"learning_rate": 1.1314544332441885e-06,
"loss": 0.0416,
"step": 486000
},
{
"epoch": 49.8870868404845,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.024302508682012558,
"eval_runtime": 12.6346,
"eval_samples_per_second": 7.123,
"eval_steps_per_second": 0.079,
"eval_wer": 0.04736371760500447,
"step": 486000
},
{
"epoch": 49.98973516731677,
"grad_norm": 0.2672029733657837,
"learning_rate": 1.0285949393128985e-07,
"loss": 0.0416,
"step": 487000
},
{
"epoch": 49.98973516731677,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.024304790422320366,
"eval_runtime": 12.659,
"eval_samples_per_second": 7.11,
"eval_steps_per_second": 0.079,
"eval_wer": 0.04736371760500447,
"step": 487000
}
],
"logging_steps": 1000,
"max_steps": 487100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.703420432200581e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}