mana-tts / homo-ge2pe /trainer_state.json
abreza's picture
add ge2pe
eb57aa1
{
"best_metric": 0.012459825724363327,
"best_model_checkpoint": "./phase3-30-ep/checkpoint-473000",
"epoch": 50.0,
"eval_steps": 1000,
"global_step": 487100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10264832683227264,
"grad_norm": 0.17517703771591187,
"learning_rate": 0.0005,
"loss": 0.1055,
"step": 1000
},
{
"epoch": 0.10264832683227264,
"eval_cer": 0.029768957345971563,
"eval_loss": 0.07166381180286407,
"eval_runtime": 21.4774,
"eval_samples_per_second": 4.19,
"eval_steps_per_second": 0.047,
"eval_wer": 0.11796246648793565,
"step": 1000
},
{
"epoch": 0.20529665366454528,
"grad_norm": 0.18113084137439728,
"learning_rate": 0.0004989714050606871,
"loss": 0.0829,
"step": 2000
},
{
"epoch": 0.20529665366454528,
"eval_cer": 0.02754739336492891,
"eval_loss": 0.06128456071019173,
"eval_runtime": 24.2215,
"eval_samples_per_second": 3.716,
"eval_steps_per_second": 0.041,
"eval_wer": 0.10455764075067024,
"step": 2000
},
{
"epoch": 0.3079449804968179,
"grad_norm": 0.31132909655570984,
"learning_rate": 0.0004979428101213742,
"loss": 0.077,
"step": 3000
},
{
"epoch": 0.3079449804968179,
"eval_cer": 0.026954976303317536,
"eval_loss": 0.056063100695610046,
"eval_runtime": 24.595,
"eval_samples_per_second": 3.659,
"eval_steps_per_second": 0.041,
"eval_wer": 0.10187667560321716,
"step": 3000
},
{
"epoch": 0.41059330732909055,
"grad_norm": 0.2209460735321045,
"learning_rate": 0.0004969142151820613,
"loss": 0.0746,
"step": 4000
},
{
"epoch": 0.41059330732909055,
"eval_cer": 0.023548578199052133,
"eval_loss": 0.05421268939971924,
"eval_runtime": 39.13,
"eval_samples_per_second": 2.3,
"eval_steps_per_second": 0.026,
"eval_wer": 0.09204647006255585,
"step": 4000
},
{
"epoch": 0.5132416341613631,
"grad_norm": 0.21206562221050262,
"learning_rate": 0.0004958856202427484,
"loss": 0.0723,
"step": 5000
},
{
"epoch": 0.5132416341613631,
"eval_cer": 0.023548578199052133,
"eval_loss": 0.053859543055295944,
"eval_runtime": 39.8671,
"eval_samples_per_second": 2.258,
"eval_steps_per_second": 0.025,
"eval_wer": 0.0902591599642538,
"step": 5000
},
{
"epoch": 0.6158899609936358,
"grad_norm": 0.1705954223871231,
"learning_rate": 0.0004948570253034355,
"loss": 0.0705,
"step": 6000
},
{
"epoch": 0.6158899609936358,
"eval_cer": 0.0231042654028436,
"eval_loss": 0.05038898065686226,
"eval_runtime": 23.9921,
"eval_samples_per_second": 3.751,
"eval_steps_per_second": 0.042,
"eval_wer": 0.08936550491510277,
"step": 6000
},
{
"epoch": 0.7185382878259085,
"grad_norm": 0.24641267955303192,
"learning_rate": 0.0004938284303641226,
"loss": 0.0693,
"step": 7000
},
{
"epoch": 0.7185382878259085,
"eval_cer": 0.02428909952606635,
"eval_loss": 0.04804808273911476,
"eval_runtime": 25.4073,
"eval_samples_per_second": 3.542,
"eval_steps_per_second": 0.039,
"eval_wer": 0.09204647006255585,
"step": 7000
},
{
"epoch": 0.8211866146581811,
"grad_norm": 0.14618875086307526,
"learning_rate": 0.0004927998354248098,
"loss": 0.0687,
"step": 8000
},
{
"epoch": 0.8211866146581811,
"eval_cer": 0.022067535545023696,
"eval_loss": 0.046750105917453766,
"eval_runtime": 23.0625,
"eval_samples_per_second": 3.902,
"eval_steps_per_second": 0.043,
"eval_wer": 0.08310991957104558,
"step": 8000
},
{
"epoch": 0.9238349414904538,
"grad_norm": 0.2242618203163147,
"learning_rate": 0.0004917712404854969,
"loss": 0.0668,
"step": 9000
},
{
"epoch": 0.9238349414904538,
"eval_cer": 0.02177132701421801,
"eval_loss": 0.046149224042892456,
"eval_runtime": 23.9314,
"eval_samples_per_second": 3.761,
"eval_steps_per_second": 0.042,
"eval_wer": 0.08489722966934764,
"step": 9000
},
{
"epoch": 1.0264832683227263,
"grad_norm": 0.17396153509616852,
"learning_rate": 0.000490742645546184,
"loss": 0.066,
"step": 10000
},
{
"epoch": 1.0264832683227263,
"eval_cer": 0.022363744075829382,
"eval_loss": 0.044813916087150574,
"eval_runtime": 28.3612,
"eval_samples_per_second": 3.173,
"eval_steps_per_second": 0.035,
"eval_wer": 0.08579088471849866,
"step": 10000
},
{
"epoch": 1.129131595154999,
"grad_norm": 0.24261055886745453,
"learning_rate": 0.000489714050606871,
"loss": 0.0632,
"step": 11000
},
{
"epoch": 1.129131595154999,
"eval_cer": 0.02295616113744076,
"eval_loss": 0.04648038372397423,
"eval_runtime": 36.4007,
"eval_samples_per_second": 2.472,
"eval_steps_per_second": 0.027,
"eval_wer": 0.08757819481680071,
"step": 11000
},
{
"epoch": 1.2317799219872716,
"grad_norm": 0.20607537031173706,
"learning_rate": 0.0004886854556675581,
"loss": 0.0639,
"step": 12000
},
{
"epoch": 1.2317799219872716,
"eval_cer": 0.021475118483412322,
"eval_loss": 0.04516833648085594,
"eval_runtime": 40.6247,
"eval_samples_per_second": 2.215,
"eval_steps_per_second": 0.025,
"eval_wer": 0.0840035746201966,
"step": 12000
},
{
"epoch": 1.3344282488195442,
"grad_norm": 0.2226237952709198,
"learning_rate": 0.00048765686072824524,
"loss": 0.0626,
"step": 13000
},
{
"epoch": 1.3344282488195442,
"eval_cer": 0.022363744075829382,
"eval_loss": 0.04331167787313461,
"eval_runtime": 43.1244,
"eval_samples_per_second": 2.087,
"eval_steps_per_second": 0.023,
"eval_wer": 0.08668453976764968,
"step": 13000
},
{
"epoch": 1.437076575651817,
"grad_norm": 0.22998760640621185,
"learning_rate": 0.00048662826578893233,
"loss": 0.0617,
"step": 14000
},
{
"epoch": 1.437076575651817,
"eval_cer": 0.020438388625592416,
"eval_loss": 0.0439009889960289,
"eval_runtime": 40.051,
"eval_samples_per_second": 2.247,
"eval_steps_per_second": 0.025,
"eval_wer": 0.08132260947274352,
"step": 14000
},
{
"epoch": 1.5397249024840896,
"grad_norm": 0.2044006586074829,
"learning_rate": 0.0004855996708496194,
"loss": 0.0612,
"step": 15000
},
{
"epoch": 1.5397249024840896,
"eval_cer": 0.018364928909952605,
"eval_loss": 0.039780329912900925,
"eval_runtime": 40.1221,
"eval_samples_per_second": 2.243,
"eval_steps_per_second": 0.025,
"eval_wer": 0.07149240393208221,
"step": 15000
},
{
"epoch": 1.642373229316362,
"grad_norm": 0.25967568159103394,
"learning_rate": 0.0004845710759103065,
"loss": 0.0619,
"step": 16000
},
{
"epoch": 1.642373229316362,
"eval_cer": 0.021178909952606635,
"eval_loss": 0.04091305658221245,
"eval_runtime": 39.0877,
"eval_samples_per_second": 2.303,
"eval_steps_per_second": 0.026,
"eval_wer": 0.0777479892761394,
"step": 16000
},
{
"epoch": 1.7450215561486346,
"grad_norm": 0.17572972178459167,
"learning_rate": 0.0004835424809709936,
"loss": 0.0617,
"step": 17000
},
{
"epoch": 1.7450215561486346,
"eval_cer": 0.021475118483412322,
"eval_loss": 0.04012183099985123,
"eval_runtime": 39.7698,
"eval_samples_per_second": 2.263,
"eval_steps_per_second": 0.025,
"eval_wer": 0.07864164432529044,
"step": 17000
},
{
"epoch": 1.8476698829809073,
"grad_norm": 0.20715534687042236,
"learning_rate": 0.0004825138860316807,
"loss": 0.0607,
"step": 18000
},
{
"epoch": 1.8476698829809073,
"eval_cer": 0.02177132701421801,
"eval_loss": 0.04150845482945442,
"eval_runtime": 39.4055,
"eval_samples_per_second": 2.284,
"eval_steps_per_second": 0.025,
"eval_wer": 0.08132260947274352,
"step": 18000
},
{
"epoch": 1.95031820981318,
"grad_norm": 0.3426735997200012,
"learning_rate": 0.00048148529109236785,
"loss": 0.0602,
"step": 19000
},
{
"epoch": 1.95031820981318,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.03899341821670532,
"eval_runtime": 39.6568,
"eval_samples_per_second": 2.269,
"eval_steps_per_second": 0.025,
"eval_wer": 0.06881143878462913,
"step": 19000
},
{
"epoch": 2.0529665366454526,
"grad_norm": 0.15929488837718964,
"learning_rate": 0.00048045669615305494,
"loss": 0.0585,
"step": 20000
},
{
"epoch": 2.0529665366454526,
"eval_cer": 0.019994075829383885,
"eval_loss": 0.03957120701670647,
"eval_runtime": 24.5545,
"eval_samples_per_second": 3.665,
"eval_steps_per_second": 0.041,
"eval_wer": 0.07506702412868632,
"step": 20000
},
{
"epoch": 2.1556148634777252,
"grad_norm": 0.18062791228294373,
"learning_rate": 0.00047942810121374204,
"loss": 0.0579,
"step": 21000
},
{
"epoch": 2.1556148634777252,
"eval_cer": 0.019105450236966824,
"eval_loss": 0.039500825107097626,
"eval_runtime": 21.8989,
"eval_samples_per_second": 4.11,
"eval_steps_per_second": 0.046,
"eval_wer": 0.06970509383378017,
"step": 21000
},
{
"epoch": 2.258263190309998,
"grad_norm": 0.20961548388004303,
"learning_rate": 0.00047839950627442913,
"loss": 0.0571,
"step": 22000
},
{
"epoch": 2.258263190309998,
"eval_cer": 0.01925355450236967,
"eval_loss": 0.04062485322356224,
"eval_runtime": 23.0215,
"eval_samples_per_second": 3.909,
"eval_steps_per_second": 0.043,
"eval_wer": 0.0741733690795353,
"step": 22000
},
{
"epoch": 2.3609115171422705,
"grad_norm": 0.1982312947511673,
"learning_rate": 0.0004773709113351162,
"loss": 0.0574,
"step": 23000
},
{
"epoch": 2.3609115171422705,
"eval_cer": 0.0173281990521327,
"eval_loss": 0.03924456238746643,
"eval_runtime": 26.6274,
"eval_samples_per_second": 3.38,
"eval_steps_per_second": 0.038,
"eval_wer": 0.064343163538874,
"step": 23000
},
{
"epoch": 2.463559843974543,
"grad_norm": 0.26111695170402527,
"learning_rate": 0.0004763423163958033,
"loss": 0.0568,
"step": 24000
},
{
"epoch": 2.463559843974543,
"eval_cer": 0.017772511848341232,
"eval_loss": 0.038703735917806625,
"eval_runtime": 23.8875,
"eval_samples_per_second": 3.768,
"eval_steps_per_second": 0.042,
"eval_wer": 0.0679177837354781,
"step": 24000
},
{
"epoch": 2.566208170806816,
"grad_norm": 0.20790116488933563,
"learning_rate": 0.0004753137214564904,
"loss": 0.0571,
"step": 25000
},
{
"epoch": 2.566208170806816,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.03755784407258034,
"eval_runtime": 22.7427,
"eval_samples_per_second": 3.957,
"eval_steps_per_second": 0.044,
"eval_wer": 0.064343163538874,
"step": 25000
},
{
"epoch": 2.6688564976390885,
"grad_norm": 0.16015666723251343,
"learning_rate": 0.00047428512651717756,
"loss": 0.0572,
"step": 26000
},
{
"epoch": 2.6688564976390885,
"eval_cer": 0.017920616113744077,
"eval_loss": 0.03704160824418068,
"eval_runtime": 22.6235,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06613047363717604,
"step": 26000
},
{
"epoch": 2.771504824471361,
"grad_norm": 0.17609256505966187,
"learning_rate": 0.00047325653157786465,
"loss": 0.0566,
"step": 27000
},
{
"epoch": 2.771504824471361,
"eval_cer": 0.01851303317535545,
"eval_loss": 0.03628876060247421,
"eval_runtime": 22.4829,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06702412868632708,
"step": 27000
},
{
"epoch": 2.874153151303634,
"grad_norm": 0.19802771508693695,
"learning_rate": 0.00047222793663855174,
"loss": 0.0568,
"step": 28000
},
{
"epoch": 2.874153151303634,
"eval_cer": 0.016291469194312798,
"eval_loss": 0.03549469634890556,
"eval_runtime": 22.5619,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.058981233243967826,
"step": 28000
},
{
"epoch": 2.9768014781359065,
"grad_norm": 0.19432678818702698,
"learning_rate": 0.00047119934169923884,
"loss": 0.056,
"step": 29000
},
{
"epoch": 2.9768014781359065,
"eval_cer": 0.017180094786729858,
"eval_loss": 0.03459760919213295,
"eval_runtime": 22.4771,
"eval_samples_per_second": 4.004,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06523681858802502,
"step": 29000
},
{
"epoch": 3.079449804968179,
"grad_norm": 0.17181651294231415,
"learning_rate": 0.00047017074675992593,
"loss": 0.0546,
"step": 30000
},
{
"epoch": 3.079449804968179,
"eval_cer": 0.018809241706161137,
"eval_loss": 0.03514665365219116,
"eval_runtime": 23.0403,
"eval_samples_per_second": 3.906,
"eval_steps_per_second": 0.043,
"eval_wer": 0.06881143878462913,
"step": 30000
},
{
"epoch": 3.1820981318004518,
"grad_norm": 0.29426151514053345,
"learning_rate": 0.000469142151820613,
"loss": 0.0545,
"step": 31000
},
{
"epoch": 3.1820981318004518,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.03719107061624527,
"eval_runtime": 24.2578,
"eval_samples_per_second": 3.71,
"eval_steps_per_second": 0.041,
"eval_wer": 0.06881143878462913,
"step": 31000
},
{
"epoch": 3.2847464586327244,
"grad_norm": 0.14310035109519958,
"learning_rate": 0.0004681135568813001,
"loss": 0.0533,
"step": 32000
},
{
"epoch": 3.2847464586327244,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.03583410009741783,
"eval_runtime": 22.3344,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06702412868632708,
"step": 32000
},
{
"epoch": 3.387394785464997,
"grad_norm": 0.16393882036209106,
"learning_rate": 0.00046708496194198726,
"loss": 0.0537,
"step": 33000
},
{
"epoch": 3.387394785464997,
"eval_cer": 0.017476303317535545,
"eval_loss": 0.03669163957238197,
"eval_runtime": 22.2397,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06523681858802502,
"step": 33000
},
{
"epoch": 3.4900431122972697,
"grad_norm": 0.1863625943660736,
"learning_rate": 0.00046605636700267436,
"loss": 0.0542,
"step": 34000
},
{
"epoch": 3.4900431122972697,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.03613027185201645,
"eval_runtime": 22.4068,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 34000
},
{
"epoch": 3.592691439129542,
"grad_norm": 0.1313330978155136,
"learning_rate": 0.00046502777206336145,
"loss": 0.0536,
"step": 35000
},
{
"epoch": 3.592691439129542,
"eval_cer": 0.018216824644549764,
"eval_loss": 0.03634100779891014,
"eval_runtime": 22.5099,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06523681858802502,
"step": 35000
},
{
"epoch": 3.6953397659618146,
"grad_norm": 0.15501771867275238,
"learning_rate": 0.00046399917712404854,
"loss": 0.0541,
"step": 36000
},
{
"epoch": 3.6953397659618146,
"eval_cer": 0.016587677725118485,
"eval_loss": 0.03412258252501488,
"eval_runtime": 22.4232,
"eval_samples_per_second": 4.014,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 36000
},
{
"epoch": 3.7979880927940872,
"grad_norm": 0.1870546042919159,
"learning_rate": 0.00046297058218473564,
"loss": 0.0538,
"step": 37000
},
{
"epoch": 3.7979880927940872,
"eval_cer": 0.01762440758293839,
"eval_loss": 0.03531961515545845,
"eval_runtime": 22.4977,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06523681858802502,
"step": 37000
},
{
"epoch": 3.90063641962636,
"grad_norm": 0.1889723688364029,
"learning_rate": 0.00046194198724542273,
"loss": 0.054,
"step": 38000
},
{
"epoch": 3.90063641962636,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.03371370583772659,
"eval_runtime": 24.169,
"eval_samples_per_second": 3.724,
"eval_steps_per_second": 0.041,
"eval_wer": 0.05361930294906166,
"step": 38000
},
{
"epoch": 4.0032847464586325,
"grad_norm": 0.17405888438224792,
"learning_rate": 0.0004609133923061098,
"loss": 0.0535,
"step": 39000
},
{
"epoch": 4.0032847464586325,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.03488326445221901,
"eval_runtime": 23.3428,
"eval_samples_per_second": 3.856,
"eval_steps_per_second": 0.043,
"eval_wer": 0.058981233243967826,
"step": 39000
},
{
"epoch": 4.105933073290905,
"grad_norm": 0.21957945823669434,
"learning_rate": 0.00045988479736679697,
"loss": 0.0519,
"step": 40000
},
{
"epoch": 4.105933073290905,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.03299200162291527,
"eval_runtime": 22.9291,
"eval_samples_per_second": 3.925,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06076854334226988,
"step": 40000
},
{
"epoch": 4.208581400123178,
"grad_norm": 0.1985115259885788,
"learning_rate": 0.00045885620242748406,
"loss": 0.0513,
"step": 41000
},
{
"epoch": 4.208581400123178,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.0352088063955307,
"eval_runtime": 22.3573,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06344950848972297,
"step": 41000
},
{
"epoch": 4.3112297269554505,
"grad_norm": 0.2313787192106247,
"learning_rate": 0.00045782760748817116,
"loss": 0.0515,
"step": 42000
},
{
"epoch": 4.3112297269554505,
"eval_cer": 0.01688388625592417,
"eval_loss": 0.03440188989043236,
"eval_runtime": 23.1819,
"eval_samples_per_second": 3.882,
"eval_steps_per_second": 0.043,
"eval_wer": 0.06344950848972297,
"step": 42000
},
{
"epoch": 4.413878053787723,
"grad_norm": 0.14888563752174377,
"learning_rate": 0.00045679901254885825,
"loss": 0.0512,
"step": 43000
},
{
"epoch": 4.413878053787723,
"eval_cer": 0.017031990521327013,
"eval_loss": 0.03430500999093056,
"eval_runtime": 22.7046,
"eval_samples_per_second": 3.964,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06344950848972297,
"step": 43000
},
{
"epoch": 4.516526380619996,
"grad_norm": 0.1658962070941925,
"learning_rate": 0.00045577041760954534,
"loss": 0.0513,
"step": 44000
},
{
"epoch": 4.516526380619996,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.03472462296485901,
"eval_runtime": 22.7468,
"eval_samples_per_second": 3.957,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05540661304736372,
"step": 44000
},
{
"epoch": 4.619174707452268,
"grad_norm": 0.2193230837583542,
"learning_rate": 0.00045474182267023244,
"loss": 0.0516,
"step": 45000
},
{
"epoch": 4.619174707452268,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.03404483199119568,
"eval_runtime": 22.3387,
"eval_samples_per_second": 4.029,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06166219839142091,
"step": 45000
},
{
"epoch": 4.721823034284541,
"grad_norm": 0.2104436755180359,
"learning_rate": 0.00045371322773091953,
"loss": 0.0515,
"step": 46000
},
{
"epoch": 4.721823034284541,
"eval_cer": 0.015847156398104266,
"eval_loss": 0.033337026834487915,
"eval_runtime": 22.8456,
"eval_samples_per_second": 3.939,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06344950848972297,
"step": 46000
},
{
"epoch": 4.824471361116814,
"grad_norm": 0.18940144777297974,
"learning_rate": 0.0004526846327916067,
"loss": 0.0512,
"step": 47000
},
{
"epoch": 4.824471361116814,
"eval_cer": 0.015995260663507108,
"eval_loss": 0.03273012861609459,
"eval_runtime": 22.7058,
"eval_samples_per_second": 3.964,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0580875781948168,
"step": 47000
},
{
"epoch": 4.927119687949086,
"grad_norm": 0.1933116912841797,
"learning_rate": 0.00045165603785229377,
"loss": 0.0517,
"step": 48000
},
{
"epoch": 4.927119687949086,
"eval_cer": 0.01525473933649289,
"eval_loss": 0.03291744366288185,
"eval_runtime": 22.6758,
"eval_samples_per_second": 3.969,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05987488829311886,
"step": 48000
},
{
"epoch": 5.029768014781359,
"grad_norm": 0.18987509608268738,
"learning_rate": 0.00045062744291298086,
"loss": 0.0508,
"step": 49000
},
{
"epoch": 5.029768014781359,
"eval_cer": 0.017180094786729858,
"eval_loss": 0.03271958604454994,
"eval_runtime": 22.4963,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06255585344057193,
"step": 49000
},
{
"epoch": 5.132416341613632,
"grad_norm": 0.1620320975780487,
"learning_rate": 0.00044959884797366796,
"loss": 0.0491,
"step": 50000
},
{
"epoch": 5.132416341613632,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.03121078759431839,
"eval_runtime": 22.8979,
"eval_samples_per_second": 3.93,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05451295799821269,
"step": 50000
},
{
"epoch": 5.235064668445904,
"grad_norm": 0.1285402774810791,
"learning_rate": 0.00044857025303435505,
"loss": 0.0493,
"step": 51000
},
{
"epoch": 5.235064668445904,
"eval_cer": 0.015550947867298577,
"eval_loss": 0.03293353319168091,
"eval_runtime": 22.5976,
"eval_samples_per_second": 3.983,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06166219839142091,
"step": 51000
},
{
"epoch": 5.337712995278177,
"grad_norm": 0.24566827714443207,
"learning_rate": 0.00044754165809504214,
"loss": 0.0498,
"step": 52000
},
{
"epoch": 5.337712995278177,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.030513830482959747,
"eval_runtime": 22.5054,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05183199285075961,
"step": 52000
},
{
"epoch": 5.44036132211045,
"grad_norm": 0.18935276567935944,
"learning_rate": 0.00044651306315572923,
"loss": 0.0498,
"step": 53000
},
{
"epoch": 5.44036132211045,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.03185874596238136,
"eval_runtime": 22.2794,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0580875781948168,
"step": 53000
},
{
"epoch": 5.543009648942722,
"grad_norm": 0.3019377291202545,
"learning_rate": 0.0004454844682164164,
"loss": 0.0498,
"step": 54000
},
{
"epoch": 5.543009648942722,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.03230896592140198,
"eval_runtime": 22.4205,
"eval_samples_per_second": 4.014,
"eval_steps_per_second": 0.045,
"eval_wer": 0.064343163538874,
"step": 54000
},
{
"epoch": 5.645657975774995,
"grad_norm": 0.19573438167572021,
"learning_rate": 0.0004444558732771035,
"loss": 0.0499,
"step": 55000
},
{
"epoch": 5.645657975774995,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.03094463422894478,
"eval_runtime": 22.3739,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 55000
},
{
"epoch": 5.748306302607268,
"grad_norm": 0.19702386856079102,
"learning_rate": 0.00044342727833779057,
"loss": 0.0496,
"step": 56000
},
{
"epoch": 5.748306302607268,
"eval_cer": 0.01643957345971564,
"eval_loss": 0.031046954914927483,
"eval_runtime": 22.342,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.058981233243967826,
"step": 56000
},
{
"epoch": 5.85095462943954,
"grad_norm": 0.21981871128082275,
"learning_rate": 0.00044239868339847766,
"loss": 0.0494,
"step": 57000
},
{
"epoch": 5.85095462943954,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.03140529617667198,
"eval_runtime": 22.392,
"eval_samples_per_second": 4.019,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 57000
},
{
"epoch": 5.953602956271813,
"grad_norm": 0.1707638055086136,
"learning_rate": 0.00044137008845916475,
"loss": 0.0498,
"step": 58000
},
{
"epoch": 5.953602956271813,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.031634390354156494,
"eval_runtime": 22.7229,
"eval_samples_per_second": 3.961,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05451295799821269,
"step": 58000
},
{
"epoch": 6.056251283104086,
"grad_norm": 0.18458805978298187,
"learning_rate": 0.00044034149351985185,
"loss": 0.0481,
"step": 59000
},
{
"epoch": 6.056251283104086,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.03125843033194542,
"eval_runtime": 22.6851,
"eval_samples_per_second": 3.967,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05540661304736372,
"step": 59000
},
{
"epoch": 6.158899609936358,
"grad_norm": 0.176268070936203,
"learning_rate": 0.00043931289858053894,
"loss": 0.048,
"step": 60000
},
{
"epoch": 6.158899609936358,
"eval_cer": 0.013477488151658768,
"eval_loss": 0.0313909687101841,
"eval_runtime": 22.4652,
"eval_samples_per_second": 4.006,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05540661304736372,
"step": 60000
},
{
"epoch": 6.261547936768631,
"grad_norm": 0.21893835067749023,
"learning_rate": 0.0004382843036412261,
"loss": 0.0481,
"step": 61000
},
{
"epoch": 6.261547936768631,
"eval_cer": 0.015402843601895734,
"eval_loss": 0.030466848984360695,
"eval_runtime": 22.4624,
"eval_samples_per_second": 4.007,
"eval_steps_per_second": 0.045,
"eval_wer": 0.06255585344057193,
"step": 61000
},
{
"epoch": 6.3641962636009035,
"grad_norm": 0.17575185000896454,
"learning_rate": 0.0004372557087019132,
"loss": 0.0481,
"step": 62000
},
{
"epoch": 6.3641962636009035,
"eval_cer": 0.012588862559241706,
"eval_loss": 0.029415711760520935,
"eval_runtime": 22.6218,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.044,
"eval_wer": 0.050044682752457555,
"step": 62000
},
{
"epoch": 6.466844590433176,
"grad_norm": 0.21119283139705658,
"learning_rate": 0.0004362271137626003,
"loss": 0.0484,
"step": 63000
},
{
"epoch": 6.466844590433176,
"eval_cer": 0.013329383886255925,
"eval_loss": 0.030311500653624535,
"eval_runtime": 22.4678,
"eval_samples_per_second": 4.006,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 63000
},
{
"epoch": 6.569492917265449,
"grad_norm": 0.20543061196804047,
"learning_rate": 0.0004351985188232874,
"loss": 0.0475,
"step": 64000
},
{
"epoch": 6.569492917265449,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.030118942260742188,
"eval_runtime": 22.5229,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05361930294906166,
"step": 64000
},
{
"epoch": 6.6721412440977215,
"grad_norm": 0.4000137448310852,
"learning_rate": 0.0004341699238839745,
"loss": 0.0481,
"step": 65000
},
{
"epoch": 6.6721412440977215,
"eval_cer": 0.015106635071090047,
"eval_loss": 0.030896518379449844,
"eval_runtime": 22.5328,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0580875781948168,
"step": 65000
},
{
"epoch": 6.774789570929994,
"grad_norm": 0.2505108118057251,
"learning_rate": 0.0004331413289446616,
"loss": 0.0486,
"step": 66000
},
{
"epoch": 6.774789570929994,
"eval_cer": 0.012885071090047393,
"eval_loss": 0.030706828460097313,
"eval_runtime": 22.8491,
"eval_samples_per_second": 3.939,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05093833780160858,
"step": 66000
},
{
"epoch": 6.877437897762267,
"grad_norm": 0.1690637618303299,
"learning_rate": 0.00043211273400534876,
"loss": 0.0478,
"step": 67000
},
{
"epoch": 6.877437897762267,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.02974248118698597,
"eval_runtime": 22.6356,
"eval_samples_per_second": 3.976,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05540661304736372,
"step": 67000
},
{
"epoch": 6.980086224594539,
"grad_norm": 0.2266341745853424,
"learning_rate": 0.00043108413906603585,
"loss": 0.0481,
"step": 68000
},
{
"epoch": 6.980086224594539,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.030792562291026115,
"eval_runtime": 22.6232,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0580875781948168,
"step": 68000
},
{
"epoch": 7.082734551426812,
"grad_norm": 0.2072857916355133,
"learning_rate": 0.00043005554412672294,
"loss": 0.0462,
"step": 69000
},
{
"epoch": 7.082734551426812,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.02916835993528366,
"eval_runtime": 23.5494,
"eval_samples_per_second": 3.822,
"eval_steps_per_second": 0.042,
"eval_wer": 0.05272564789991063,
"step": 69000
},
{
"epoch": 7.185382878259085,
"grad_norm": 0.25637751817703247,
"learning_rate": 0.00042902694918741004,
"loss": 0.0466,
"step": 70000
},
{
"epoch": 7.185382878259085,
"eval_cer": 0.013329383886255925,
"eval_loss": 0.028768625110387802,
"eval_runtime": 23.0948,
"eval_samples_per_second": 3.897,
"eval_steps_per_second": 0.043,
"eval_wer": 0.05451295799821269,
"step": 70000
},
{
"epoch": 7.288031205091357,
"grad_norm": 0.16115036606788635,
"learning_rate": 0.00042799835424809713,
"loss": 0.0465,
"step": 71000
},
{
"epoch": 7.288031205091357,
"eval_cer": 0.013921800947867298,
"eval_loss": 0.029431801289319992,
"eval_runtime": 22.898,
"eval_samples_per_second": 3.93,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05451295799821269,
"step": 71000
},
{
"epoch": 7.39067953192363,
"grad_norm": 0.3072957396507263,
"learning_rate": 0.0004269697593087842,
"loss": 0.0464,
"step": 72000
},
{
"epoch": 7.39067953192363,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.028679879382252693,
"eval_runtime": 22.6337,
"eval_samples_per_second": 3.976,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05183199285075961,
"step": 72000
},
{
"epoch": 7.493327858755903,
"grad_norm": 0.3598809242248535,
"learning_rate": 0.0004259411643694713,
"loss": 0.0472,
"step": 73000
},
{
"epoch": 7.493327858755903,
"eval_cer": 0.013181279620853081,
"eval_loss": 0.030514726415276527,
"eval_runtime": 22.6134,
"eval_samples_per_second": 3.98,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05361930294906166,
"step": 73000
},
{
"epoch": 7.5959761855881744,
"grad_norm": 0.24177242815494537,
"learning_rate": 0.00042491256943015846,
"loss": 0.0461,
"step": 74000
},
{
"epoch": 7.5959761855881744,
"eval_cer": 0.013477488151658768,
"eval_loss": 0.02993646450340748,
"eval_runtime": 22.6695,
"eval_samples_per_second": 3.97,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05183199285075961,
"step": 74000
},
{
"epoch": 7.698624512420448,
"grad_norm": 0.14063900709152222,
"learning_rate": 0.00042388397449084556,
"loss": 0.0469,
"step": 75000
},
{
"epoch": 7.698624512420448,
"eval_cer": 0.013773696682464455,
"eval_loss": 0.02932840585708618,
"eval_runtime": 22.5476,
"eval_samples_per_second": 3.992,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05451295799821269,
"step": 75000
},
{
"epoch": 7.80127283925272,
"grad_norm": 0.20371408760547638,
"learning_rate": 0.00042285537955153265,
"loss": 0.0473,
"step": 76000
},
{
"epoch": 7.80127283925272,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.029385393485426903,
"eval_runtime": 22.4143,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 76000
},
{
"epoch": 7.903921166084993,
"grad_norm": 0.17325064539909363,
"learning_rate": 0.00042182678461221974,
"loss": 0.047,
"step": 77000
},
{
"epoch": 7.903921166084993,
"eval_cer": 0.012144549763033176,
"eval_loss": 0.02776852808892727,
"eval_runtime": 22.3852,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 77000
},
{
"epoch": 8.006569492917265,
"grad_norm": 0.261836975812912,
"learning_rate": 0.00042079818967290683,
"loss": 0.0458,
"step": 78000
},
{
"epoch": 8.006569492917265,
"eval_cer": 0.01229265402843602,
"eval_loss": 0.02698938362300396,
"eval_runtime": 22.4901,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05093833780160858,
"step": 78000
},
{
"epoch": 8.109217819749539,
"grad_norm": 0.24788102507591248,
"learning_rate": 0.00041976959473359393,
"loss": 0.0448,
"step": 79000
},
{
"epoch": 8.109217819749539,
"eval_cer": 0.014069905213270142,
"eval_loss": 0.02948344498872757,
"eval_runtime": 22.4348,
"eval_samples_per_second": 4.012,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05719392314566577,
"step": 79000
},
{
"epoch": 8.21186614658181,
"grad_norm": 0.22888223826885223,
"learning_rate": 0.000418740999794281,
"loss": 0.0455,
"step": 80000
},
{
"epoch": 8.21186614658181,
"eval_cer": 0.012440758293838863,
"eval_loss": 0.027837086468935013,
"eval_runtime": 22.6805,
"eval_samples_per_second": 3.968,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04825737265415549,
"step": 80000
},
{
"epoch": 8.314514473414084,
"grad_norm": 0.15464870631694794,
"learning_rate": 0.00041771240485496817,
"loss": 0.0456,
"step": 81000
},
{
"epoch": 8.314514473414084,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.03025979734957218,
"eval_runtime": 22.4815,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.06076854334226988,
"step": 81000
},
{
"epoch": 8.417162800246356,
"grad_norm": 0.2563960552215576,
"learning_rate": 0.00041668380991565526,
"loss": 0.0449,
"step": 82000
},
{
"epoch": 8.417162800246356,
"eval_cer": 0.013329383886255925,
"eval_loss": 0.028577908873558044,
"eval_runtime": 22.508,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05093833780160858,
"step": 82000
},
{
"epoch": 8.51981112707863,
"grad_norm": 0.2178841084241867,
"learning_rate": 0.00041565521497634235,
"loss": 0.0453,
"step": 83000
},
{
"epoch": 8.51981112707863,
"eval_cer": 0.011700236966824644,
"eval_loss": 0.02777865342795849,
"eval_runtime": 22.4852,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04736371760500447,
"step": 83000
},
{
"epoch": 8.622459453910901,
"grad_norm": 0.16487497091293335,
"learning_rate": 0.00041462662003702945,
"loss": 0.0452,
"step": 84000
},
{
"epoch": 8.622459453910901,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.02851984277367592,
"eval_runtime": 22.544,
"eval_samples_per_second": 3.992,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05451295799821269,
"step": 84000
},
{
"epoch": 8.725107780743175,
"grad_norm": 0.1772727370262146,
"learning_rate": 0.00041359802509771654,
"loss": 0.0455,
"step": 85000
},
{
"epoch": 8.725107780743175,
"eval_cer": 0.013477488151658768,
"eval_loss": 0.027403153479099274,
"eval_runtime": 22.2753,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05272564789991063,
"step": 85000
},
{
"epoch": 8.827756107575446,
"grad_norm": 0.2657695710659027,
"learning_rate": 0.00041256943015840363,
"loss": 0.0454,
"step": 86000
},
{
"epoch": 8.827756107575446,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.027919290587306023,
"eval_runtime": 22.8782,
"eval_samples_per_second": 3.934,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05630026809651475,
"step": 86000
},
{
"epoch": 8.93040443440772,
"grad_norm": 0.18787504732608795,
"learning_rate": 0.00041154083521909073,
"loss": 0.0455,
"step": 87000
},
{
"epoch": 8.93040443440772,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.027214767411351204,
"eval_runtime": 22.7677,
"eval_samples_per_second": 3.953,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05630026809651475,
"step": 87000
},
{
"epoch": 9.033052761239992,
"grad_norm": 0.21097755432128906,
"learning_rate": 0.0004105122402797779,
"loss": 0.0451,
"step": 88000
},
{
"epoch": 9.033052761239992,
"eval_cer": 0.01273696682464455,
"eval_loss": 0.02636747434735298,
"eval_runtime": 22.9582,
"eval_samples_per_second": 3.92,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05183199285075961,
"step": 88000
},
{
"epoch": 9.135701088072265,
"grad_norm": 0.17829887568950653,
"learning_rate": 0.00040948364534046497,
"loss": 0.0437,
"step": 89000
},
{
"epoch": 9.135701088072265,
"eval_cer": 0.01110781990521327,
"eval_loss": 0.026946688070893288,
"eval_runtime": 22.62,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.044,
"eval_wer": 0.045576407506702415,
"step": 89000
},
{
"epoch": 9.238349414904537,
"grad_norm": 0.1892678588628769,
"learning_rate": 0.00040845505040115206,
"loss": 0.0436,
"step": 90000
},
{
"epoch": 9.238349414904537,
"eval_cer": 0.013329383886255925,
"eval_loss": 0.0261703971773386,
"eval_runtime": 22.7469,
"eval_samples_per_second": 3.957,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05183199285075961,
"step": 90000
},
{
"epoch": 9.34099774173681,
"grad_norm": 0.1827981173992157,
"learning_rate": 0.00040742645546183915,
"loss": 0.0442,
"step": 91000
},
{
"epoch": 9.34099774173681,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.026994889602065086,
"eval_runtime": 22.3199,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05361930294906166,
"step": 91000
},
{
"epoch": 9.443646068569082,
"grad_norm": 0.26229721307754517,
"learning_rate": 0.00040639786052252625,
"loss": 0.0442,
"step": 92000
},
{
"epoch": 9.443646068569082,
"eval_cer": 0.012588862559241706,
"eval_loss": 0.026131337508559227,
"eval_runtime": 30.6242,
"eval_samples_per_second": 2.939,
"eval_steps_per_second": 0.033,
"eval_wer": 0.050044682752457555,
"step": 92000
},
{
"epoch": 9.546294395401356,
"grad_norm": 0.31516391038894653,
"learning_rate": 0.00040536926558321334,
"loss": 0.0442,
"step": 93000
},
{
"epoch": 9.546294395401356,
"eval_cer": 0.012588862559241706,
"eval_loss": 0.023602332919836044,
"eval_runtime": 27.5058,
"eval_samples_per_second": 3.272,
"eval_steps_per_second": 0.036,
"eval_wer": 0.049151027703306524,
"step": 93000
},
{
"epoch": 9.648942722233627,
"grad_norm": 0.19427119195461273,
"learning_rate": 0.00040434067064390043,
"loss": 0.0443,
"step": 94000
},
{
"epoch": 9.648942722233627,
"eval_cer": 0.013033175355450236,
"eval_loss": 0.02646990306675434,
"eval_runtime": 27.6213,
"eval_samples_per_second": 3.258,
"eval_steps_per_second": 0.036,
"eval_wer": 0.049151027703306524,
"step": 94000
},
{
"epoch": 9.751591049065901,
"grad_norm": 0.26338282227516174,
"learning_rate": 0.0004033120757045876,
"loss": 0.0447,
"step": 95000
},
{
"epoch": 9.751591049065901,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.025444395840168,
"eval_runtime": 25.0834,
"eval_samples_per_second": 3.588,
"eval_steps_per_second": 0.04,
"eval_wer": 0.05272564789991063,
"step": 95000
},
{
"epoch": 9.854239375898173,
"grad_norm": 0.25808289647102356,
"learning_rate": 0.0004022834807652747,
"loss": 0.0445,
"step": 96000
},
{
"epoch": 9.854239375898173,
"eval_cer": 0.014958530805687204,
"eval_loss": 0.026209862902760506,
"eval_runtime": 23.4245,
"eval_samples_per_second": 3.842,
"eval_steps_per_second": 0.043,
"eval_wer": 0.05361930294906166,
"step": 96000
},
{
"epoch": 9.956887702730446,
"grad_norm": 0.18842875957489014,
"learning_rate": 0.00040125488582596177,
"loss": 0.0445,
"step": 97000
},
{
"epoch": 9.956887702730446,
"eval_cer": 0.012440758293838863,
"eval_loss": 0.027286237105727196,
"eval_runtime": 22.6774,
"eval_samples_per_second": 3.969,
"eval_steps_per_second": 0.044,
"eval_wer": 0.050044682752457555,
"step": 97000
},
{
"epoch": 10.059536029562718,
"grad_norm": 0.2742888331413269,
"learning_rate": 0.00040022629088664886,
"loss": 0.0432,
"step": 98000
},
{
"epoch": 10.059536029562718,
"eval_cer": 0.011404028436018957,
"eval_loss": 0.027185438200831413,
"eval_runtime": 27.5005,
"eval_samples_per_second": 3.273,
"eval_steps_per_second": 0.036,
"eval_wer": 0.04736371760500447,
"step": 98000
},
{
"epoch": 10.162184356394992,
"grad_norm": 0.20179295539855957,
"learning_rate": 0.00039919769594733595,
"loss": 0.0428,
"step": 99000
},
{
"epoch": 10.162184356394992,
"eval_cer": 0.01481042654028436,
"eval_loss": 0.02597665973007679,
"eval_runtime": 25.1306,
"eval_samples_per_second": 3.581,
"eval_steps_per_second": 0.04,
"eval_wer": 0.04736371760500447,
"step": 99000
},
{
"epoch": 10.264832683227263,
"grad_norm": 0.16221770644187927,
"learning_rate": 0.00039816910100802305,
"loss": 0.0429,
"step": 100000
},
{
"epoch": 10.264832683227263,
"eval_cer": 0.011848341232227487,
"eval_loss": 0.027527980506420135,
"eval_runtime": 23.4033,
"eval_samples_per_second": 3.846,
"eval_steps_per_second": 0.043,
"eval_wer": 0.04736371760500447,
"step": 100000
},
{
"epoch": 10.367481010059535,
"grad_norm": 0.17623300850391388,
"learning_rate": 0.00039714050606871014,
"loss": 0.0435,
"step": 101000
},
{
"epoch": 10.367481010059535,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.026551930233836174,
"eval_runtime": 23.1192,
"eval_samples_per_second": 3.893,
"eval_steps_per_second": 0.043,
"eval_wer": 0.05361930294906166,
"step": 101000
},
{
"epoch": 10.470129336891809,
"grad_norm": 0.29380717873573303,
"learning_rate": 0.0003961119111293973,
"loss": 0.0431,
"step": 102000
},
{
"epoch": 10.470129336891809,
"eval_cer": 0.017476303317535545,
"eval_loss": 0.026500999927520752,
"eval_runtime": 23.0878,
"eval_samples_per_second": 3.898,
"eval_steps_per_second": 0.043,
"eval_wer": 0.05361930294906166,
"step": 102000
},
{
"epoch": 10.572777663724082,
"grad_norm": 0.19392183423042297,
"learning_rate": 0.0003950833161900844,
"loss": 0.043,
"step": 103000
},
{
"epoch": 10.572777663724082,
"eval_cer": 0.013033175355450236,
"eval_loss": 0.02609255537390709,
"eval_runtime": 22.8011,
"eval_samples_per_second": 3.947,
"eval_steps_per_second": 0.044,
"eval_wer": 0.049151027703306524,
"step": 103000
},
{
"epoch": 10.675425990556354,
"grad_norm": 0.17925652861595154,
"learning_rate": 0.0003940547212507715,
"loss": 0.0433,
"step": 104000
},
{
"epoch": 10.675425990556354,
"eval_cer": 0.01273696682464455,
"eval_loss": 0.027248414233326912,
"eval_runtime": 22.7524,
"eval_samples_per_second": 3.956,
"eval_steps_per_second": 0.044,
"eval_wer": 0.050044682752457555,
"step": 104000
},
{
"epoch": 10.778074317388626,
"grad_norm": 0.21368491649627686,
"learning_rate": 0.00039302612631145857,
"loss": 0.0431,
"step": 105000
},
{
"epoch": 10.778074317388626,
"eval_cer": 0.01273696682464455,
"eval_loss": 0.025399256497621536,
"eval_runtime": 22.4849,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.050044682752457555,
"step": 105000
},
{
"epoch": 10.8807226442209,
"grad_norm": 0.17027121782302856,
"learning_rate": 0.00039199753137214566,
"loss": 0.0435,
"step": 106000
},
{
"epoch": 10.8807226442209,
"eval_cer": 0.01229265402843602,
"eval_loss": 0.025961685925722122,
"eval_runtime": 22.3736,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.050044682752457555,
"step": 106000
},
{
"epoch": 10.983370971053171,
"grad_norm": 0.3062898516654968,
"learning_rate": 0.00039096893643283275,
"loss": 0.0434,
"step": 107000
},
{
"epoch": 10.983370971053171,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.0248806644231081,
"eval_runtime": 22.4291,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 107000
},
{
"epoch": 11.086019297885445,
"grad_norm": 0.27476412057876587,
"learning_rate": 0.00038994034149351985,
"loss": 0.0418,
"step": 108000
},
{
"epoch": 11.086019297885445,
"eval_cer": 0.011700236966824644,
"eval_loss": 0.025811193510890007,
"eval_runtime": 22.3629,
"eval_samples_per_second": 4.025,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 108000
},
{
"epoch": 11.188667624717716,
"grad_norm": 0.18025143444538116,
"learning_rate": 0.000388911746554207,
"loss": 0.0419,
"step": 109000
},
{
"epoch": 11.188667624717716,
"eval_cer": 0.01066350710900474,
"eval_loss": 0.024703815579414368,
"eval_runtime": 22.2656,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 109000
},
{
"epoch": 11.29131595154999,
"grad_norm": 0.19146864116191864,
"learning_rate": 0.0003878831516148941,
"loss": 0.042,
"step": 110000
},
{
"epoch": 11.29131595154999,
"eval_cer": 0.011404028436018957,
"eval_loss": 0.025849131867289543,
"eval_runtime": 22.6201,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04647006255585344,
"step": 110000
},
{
"epoch": 11.393964278382262,
"grad_norm": 0.4587384760379791,
"learning_rate": 0.0003868545566755812,
"loss": 0.042,
"step": 111000
},
{
"epoch": 11.393964278382262,
"eval_cer": 0.012440758293838863,
"eval_loss": 0.025229139253497124,
"eval_runtime": 22.5628,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.050044682752457555,
"step": 111000
},
{
"epoch": 11.496612605214535,
"grad_norm": 0.1752750277519226,
"learning_rate": 0.0003858259617362683,
"loss": 0.0428,
"step": 112000
},
{
"epoch": 11.496612605214535,
"eval_cer": 0.01066350710900474,
"eval_loss": 0.02614370547235012,
"eval_runtime": 22.3268,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 112000
},
{
"epoch": 11.599260932046807,
"grad_norm": 0.20651549100875854,
"learning_rate": 0.00038479736679695537,
"loss": 0.0428,
"step": 113000
},
{
"epoch": 11.599260932046807,
"eval_cer": 0.01110781990521327,
"eval_loss": 0.02591308392584324,
"eval_runtime": 22.6162,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04647006255585344,
"step": 113000
},
{
"epoch": 11.70190925887908,
"grad_norm": 0.1839723438024521,
"learning_rate": 0.00038376877185764246,
"loss": 0.0422,
"step": 114000
},
{
"epoch": 11.70190925887908,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.026186056435108185,
"eval_runtime": 22.64,
"eval_samples_per_second": 3.975,
"eval_steps_per_second": 0.044,
"eval_wer": 0.044682752457551385,
"step": 114000
},
{
"epoch": 11.804557585711352,
"grad_norm": 0.1559193879365921,
"learning_rate": 0.00038274017691832955,
"loss": 0.0426,
"step": 115000
},
{
"epoch": 11.804557585711352,
"eval_cer": 0.011996445497630332,
"eval_loss": 0.026291608810424805,
"eval_runtime": 23.0438,
"eval_samples_per_second": 3.906,
"eval_steps_per_second": 0.043,
"eval_wer": 0.045576407506702415,
"step": 115000
},
{
"epoch": 11.907205912543626,
"grad_norm": 0.33867180347442627,
"learning_rate": 0.0003817115819790167,
"loss": 0.0422,
"step": 116000
},
{
"epoch": 11.907205912543626,
"eval_cer": 0.011996445497630332,
"eval_loss": 0.025052817538380623,
"eval_runtime": 22.5162,
"eval_samples_per_second": 3.997,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04647006255585344,
"step": 116000
},
{
"epoch": 12.009854239375898,
"grad_norm": 0.2387935370206833,
"learning_rate": 0.0003806829870397038,
"loss": 0.042,
"step": 117000
},
{
"epoch": 12.009854239375898,
"eval_cer": 0.011404028436018957,
"eval_loss": 0.02570049837231636,
"eval_runtime": 22.5888,
"eval_samples_per_second": 3.984,
"eval_steps_per_second": 0.044,
"eval_wer": 0.044682752457551385,
"step": 117000
},
{
"epoch": 12.112502566208171,
"grad_norm": 0.1758970469236374,
"learning_rate": 0.0003796543921003909,
"loss": 0.0406,
"step": 118000
},
{
"epoch": 12.112502566208171,
"eval_cer": 0.01273696682464455,
"eval_loss": 0.025880787521600723,
"eval_runtime": 22.5099,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.049151027703306524,
"step": 118000
},
{
"epoch": 12.215150893040443,
"grad_norm": 0.2268359512090683,
"learning_rate": 0.000378625797161078,
"loss": 0.0409,
"step": 119000
},
{
"epoch": 12.215150893040443,
"eval_cer": 0.012588862559241706,
"eval_loss": 0.024862516671419144,
"eval_runtime": 22.7569,
"eval_samples_per_second": 3.955,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05093833780160858,
"step": 119000
},
{
"epoch": 12.317799219872716,
"grad_norm": 0.15519174933433533,
"learning_rate": 0.0003775972022217651,
"loss": 0.0412,
"step": 120000
},
{
"epoch": 12.317799219872716,
"eval_cer": 0.0115521327014218,
"eval_loss": 0.025986041873693466,
"eval_runtime": 22.6794,
"eval_samples_per_second": 3.968,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04736371760500447,
"step": 120000
},
{
"epoch": 12.420447546704988,
"grad_norm": 0.3455216884613037,
"learning_rate": 0.00037656860728245217,
"loss": 0.0411,
"step": 121000
},
{
"epoch": 12.420447546704988,
"eval_cer": 0.011700236966824644,
"eval_loss": 0.025649528950452805,
"eval_runtime": 22.7315,
"eval_samples_per_second": 3.959,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04647006255585344,
"step": 121000
},
{
"epoch": 12.523095873537262,
"grad_norm": 0.20411798357963562,
"learning_rate": 0.00037554001234313926,
"loss": 0.0418,
"step": 122000
},
{
"epoch": 12.523095873537262,
"eval_cer": 0.014662322274881517,
"eval_loss": 0.02615606226027012,
"eval_runtime": 22.7606,
"eval_samples_per_second": 3.954,
"eval_steps_per_second": 0.044,
"eval_wer": 0.05361930294906166,
"step": 122000
},
{
"epoch": 12.625744200369533,
"grad_norm": 0.25552111864089966,
"learning_rate": 0.0003745114174038264,
"loss": 0.0414,
"step": 123000
},
{
"epoch": 12.625744200369533,
"eval_cer": 0.01273696682464455,
"eval_loss": 0.024597780779004097,
"eval_runtime": 22.7241,
"eval_samples_per_second": 3.961,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04825737265415549,
"step": 123000
},
{
"epoch": 12.728392527201807,
"grad_norm": 0.24297872185707092,
"learning_rate": 0.0003734828224645135,
"loss": 0.0416,
"step": 124000
},
{
"epoch": 12.728392527201807,
"eval_cer": 0.011996445497630332,
"eval_loss": 0.0245036818087101,
"eval_runtime": 22.3574,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 124000
},
{
"epoch": 12.831040854034079,
"grad_norm": 0.16708943247795105,
"learning_rate": 0.0003724542275252006,
"loss": 0.0421,
"step": 125000
},
{
"epoch": 12.831040854034079,
"eval_cer": 0.010959715639810427,
"eval_loss": 0.023803560063242912,
"eval_runtime": 22.1003,
"eval_samples_per_second": 4.072,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 125000
},
{
"epoch": 12.933689180866352,
"grad_norm": 0.2480056881904602,
"learning_rate": 0.0003714256325858877,
"loss": 0.0417,
"step": 126000
},
{
"epoch": 12.933689180866352,
"eval_cer": 0.010959715639810427,
"eval_loss": 0.02339034155011177,
"eval_runtime": 22.1887,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 126000
},
{
"epoch": 13.036337507698624,
"grad_norm": 0.15843307971954346,
"learning_rate": 0.0003703970376465748,
"loss": 0.041,
"step": 127000
},
{
"epoch": 13.036337507698624,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.023780081421136856,
"eval_runtime": 22.1307,
"eval_samples_per_second": 4.067,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04289544235924933,
"step": 127000
},
{
"epoch": 13.138985834530898,
"grad_norm": 0.17070743441581726,
"learning_rate": 0.00036936844270726187,
"loss": 0.0397,
"step": 128000
},
{
"epoch": 13.138985834530898,
"eval_cer": 0.014514218009478674,
"eval_loss": 0.0229303240776062,
"eval_runtime": 22.379,
"eval_samples_per_second": 4.022,
"eval_steps_per_second": 0.045,
"eval_wer": 0.05093833780160858,
"step": 128000
},
{
"epoch": 13.24163416136317,
"grad_norm": 0.1719464212656021,
"learning_rate": 0.00036833984776794897,
"loss": 0.0405,
"step": 129000
},
{
"epoch": 13.24163416136317,
"eval_cer": 0.014218009478672985,
"eval_loss": 0.023020587861537933,
"eval_runtime": 22.2447,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 129000
},
{
"epoch": 13.344282488195443,
"grad_norm": 0.16199146211147308,
"learning_rate": 0.0003673112528286361,
"loss": 0.0405,
"step": 130000
},
{
"epoch": 13.344282488195443,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.023113010451197624,
"eval_runtime": 22.549,
"eval_samples_per_second": 3.991,
"eval_steps_per_second": 0.044,
"eval_wer": 0.043789097408400354,
"step": 130000
},
{
"epoch": 13.446930815027715,
"grad_norm": 0.21660035848617554,
"learning_rate": 0.0003662826578893232,
"loss": 0.0406,
"step": 131000
},
{
"epoch": 13.446930815027715,
"eval_cer": 0.01229265402843602,
"eval_loss": 0.023945845663547516,
"eval_runtime": 22.3889,
"eval_samples_per_second": 4.02,
"eval_steps_per_second": 0.045,
"eval_wer": 0.049151027703306524,
"step": 131000
},
{
"epoch": 13.549579141859988,
"grad_norm": 0.3124329447746277,
"learning_rate": 0.0003652540629500103,
"loss": 0.0406,
"step": 132000
},
{
"epoch": 13.549579141859988,
"eval_cer": 0.01110781990521327,
"eval_loss": 0.024438710883259773,
"eval_runtime": 22.5799,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.044,
"eval_wer": 0.043789097408400354,
"step": 132000
},
{
"epoch": 13.65222746869226,
"grad_norm": 0.16738218069076538,
"learning_rate": 0.0003642254680106974,
"loss": 0.0406,
"step": 133000
},
{
"epoch": 13.65222746869226,
"eval_cer": 0.011848341232227487,
"eval_loss": 0.024815011769533157,
"eval_runtime": 22.3814,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 133000
},
{
"epoch": 13.754875795524534,
"grad_norm": 0.1927761733531952,
"learning_rate": 0.0003631968730713845,
"loss": 0.0407,
"step": 134000
},
{
"epoch": 13.754875795524534,
"eval_cer": 0.010959715639810427,
"eval_loss": 0.023673338815569878,
"eval_runtime": 22.7336,
"eval_samples_per_second": 3.959,
"eval_steps_per_second": 0.044,
"eval_wer": 0.045576407506702415,
"step": 134000
},
{
"epoch": 13.857524122356805,
"grad_norm": 0.17141355574131012,
"learning_rate": 0.0003621682781320716,
"loss": 0.0411,
"step": 135000
},
{
"epoch": 13.857524122356805,
"eval_cer": 0.010959715639810427,
"eval_loss": 0.02259986102581024,
"eval_runtime": 22.6132,
"eval_samples_per_second": 3.98,
"eval_steps_per_second": 0.044,
"eval_wer": 0.041108132260947276,
"step": 135000
},
{
"epoch": 13.960172449189079,
"grad_norm": 0.24508166313171387,
"learning_rate": 0.00036113968319275867,
"loss": 0.0405,
"step": 136000
},
{
"epoch": 13.960172449189079,
"eval_cer": 0.01066350710900474,
"eval_loss": 0.022703783586621284,
"eval_runtime": 22.7539,
"eval_samples_per_second": 3.955,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04289544235924933,
"step": 136000
},
{
"epoch": 14.06282077602135,
"grad_norm": 0.16717751324176788,
"learning_rate": 0.0003601110882534458,
"loss": 0.0401,
"step": 137000
},
{
"epoch": 14.06282077602135,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.02189534902572632,
"eval_runtime": 22.4478,
"eval_samples_per_second": 4.009,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 137000
},
{
"epoch": 14.165469102853624,
"grad_norm": 0.42751288414001465,
"learning_rate": 0.0003590824933141329,
"loss": 0.0391,
"step": 138000
},
{
"epoch": 14.165469102853624,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.022342221811413765,
"eval_runtime": 22.5149,
"eval_samples_per_second": 3.997,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 138000
},
{
"epoch": 14.268117429685896,
"grad_norm": 0.2213069647550583,
"learning_rate": 0.00035805389837482,
"loss": 0.0394,
"step": 139000
},
{
"epoch": 14.268117429685896,
"eval_cer": 0.010959715639810427,
"eval_loss": 0.021898576989769936,
"eval_runtime": 22.2749,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 139000
},
{
"epoch": 14.37076575651817,
"grad_norm": 0.17157946527004242,
"learning_rate": 0.0003570253034355071,
"loss": 0.0395,
"step": 140000
},
{
"epoch": 14.37076575651817,
"eval_cer": 0.010811611374407584,
"eval_loss": 0.023508407175540924,
"eval_runtime": 22.3534,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 140000
},
{
"epoch": 14.473414083350441,
"grad_norm": 0.26436519622802734,
"learning_rate": 0.0003559967084961942,
"loss": 0.0398,
"step": 141000
},
{
"epoch": 14.473414083350441,
"eval_cer": 0.011255924170616114,
"eval_loss": 0.022889673709869385,
"eval_runtime": 22.2676,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 141000
},
{
"epoch": 14.576062410182715,
"grad_norm": 0.15638813376426697,
"learning_rate": 0.0003549681135568813,
"loss": 0.0399,
"step": 142000
},
{
"epoch": 14.576062410182715,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.022171661257743835,
"eval_runtime": 22.6709,
"eval_samples_per_second": 3.97,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0420017873100983,
"step": 142000
},
{
"epoch": 14.678710737014987,
"grad_norm": 0.22069737315177917,
"learning_rate": 0.0003539395186175684,
"loss": 0.0397,
"step": 143000
},
{
"epoch": 14.678710737014987,
"eval_cer": 0.014366113744075829,
"eval_loss": 0.02216203510761261,
"eval_runtime": 22.5252,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04647006255585344,
"step": 143000
},
{
"epoch": 14.78135906384726,
"grad_norm": 0.25842490792274475,
"learning_rate": 0.0003529109236782555,
"loss": 0.0398,
"step": 144000
},
{
"epoch": 14.78135906384726,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.023699576035141945,
"eval_runtime": 22.5737,
"eval_samples_per_second": 3.987,
"eval_steps_per_second": 0.044,
"eval_wer": 0.044682752457551385,
"step": 144000
},
{
"epoch": 14.884007390679532,
"grad_norm": 0.2184634506702423,
"learning_rate": 0.0003518823287389426,
"loss": 0.0402,
"step": 145000
},
{
"epoch": 14.884007390679532,
"eval_cer": 0.011255924170616114,
"eval_loss": 0.022893035784363747,
"eval_runtime": 22.3846,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 145000
},
{
"epoch": 14.986655717511805,
"grad_norm": 0.19810239970684052,
"learning_rate": 0.0003508537337996297,
"loss": 0.0403,
"step": 146000
},
{
"epoch": 14.986655717511805,
"eval_cer": 0.011996445497630332,
"eval_loss": 0.024159209802746773,
"eval_runtime": 22.5604,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04736371760500447,
"step": 146000
},
{
"epoch": 15.089304044344077,
"grad_norm": 0.2137177586555481,
"learning_rate": 0.0003498251388603168,
"loss": 0.0385,
"step": 147000
},
{
"epoch": 15.089304044344077,
"eval_cer": 0.010811611374407584,
"eval_loss": 0.022794917225837708,
"eval_runtime": 22.3128,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 147000
},
{
"epoch": 15.19195237117635,
"grad_norm": 0.1722225844860077,
"learning_rate": 0.0003487965439210039,
"loss": 0.0386,
"step": 148000
},
{
"epoch": 15.19195237117635,
"eval_cer": 0.011848341232227487,
"eval_loss": 0.02336839959025383,
"eval_runtime": 22.3982,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 148000
},
{
"epoch": 15.294600698008622,
"grad_norm": 0.20236076414585114,
"learning_rate": 0.000347767948981691,
"loss": 0.0392,
"step": 149000
},
{
"epoch": 15.294600698008622,
"eval_cer": 0.01229265402843602,
"eval_loss": 0.02401108108460903,
"eval_runtime": 22.0603,
"eval_samples_per_second": 4.08,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 149000
},
{
"epoch": 15.397249024840896,
"grad_norm": 0.1955161690711975,
"learning_rate": 0.0003467393540423781,
"loss": 0.039,
"step": 150000
},
{
"epoch": 15.397249024840896,
"eval_cer": 0.010811611374407584,
"eval_loss": 0.022156517952680588,
"eval_runtime": 22.3641,
"eval_samples_per_second": 4.024,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 150000
},
{
"epoch": 15.499897351673168,
"grad_norm": 0.24897447228431702,
"learning_rate": 0.00034571075910306523,
"loss": 0.0391,
"step": 151000
},
{
"epoch": 15.499897351673168,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.022661181166768074,
"eval_runtime": 22.3484,
"eval_samples_per_second": 4.027,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04289544235924933,
"step": 151000
},
{
"epoch": 15.60254567850544,
"grad_norm": 0.1920953094959259,
"learning_rate": 0.0003446821641637523,
"loss": 0.0394,
"step": 152000
},
{
"epoch": 15.60254567850544,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.021091148257255554,
"eval_runtime": 22.528,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.044,
"eval_wer": 0.045576407506702415,
"step": 152000
},
{
"epoch": 15.705194005337713,
"grad_norm": 0.20325519144535065,
"learning_rate": 0.0003436535692244394,
"loss": 0.0396,
"step": 153000
},
{
"epoch": 15.705194005337713,
"eval_cer": 0.010811611374407584,
"eval_loss": 0.023403970524668694,
"eval_runtime": 22.2476,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 153000
},
{
"epoch": 15.807842332169985,
"grad_norm": 0.15232166647911072,
"learning_rate": 0.0003426249742851265,
"loss": 0.0394,
"step": 154000
},
{
"epoch": 15.807842332169985,
"eval_cer": 0.013625592417061612,
"eval_loss": 0.022816922515630722,
"eval_runtime": 22.7231,
"eval_samples_per_second": 3.961,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04736371760500447,
"step": 154000
},
{
"epoch": 15.910490659002258,
"grad_norm": 0.3241395056247711,
"learning_rate": 0.0003415963793458136,
"loss": 0.0392,
"step": 155000
},
{
"epoch": 15.910490659002258,
"eval_cer": 0.010811611374407584,
"eval_loss": 0.021291887387633324,
"eval_runtime": 22.2235,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 155000
},
{
"epoch": 16.01313898583453,
"grad_norm": 0.19988052546977997,
"learning_rate": 0.0003405677844065007,
"loss": 0.0393,
"step": 156000
},
{
"epoch": 16.01313898583453,
"eval_cer": 0.011404028436018957,
"eval_loss": 0.02226296253502369,
"eval_runtime": 22.2614,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 156000
},
{
"epoch": 16.115787312666804,
"grad_norm": 0.23728616535663605,
"learning_rate": 0.0003395391894671878,
"loss": 0.0382,
"step": 157000
},
{
"epoch": 16.115787312666804,
"eval_cer": 0.011255924170616114,
"eval_loss": 0.021734587848186493,
"eval_runtime": 22.1722,
"eval_samples_per_second": 4.059,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 157000
},
{
"epoch": 16.218435639499077,
"grad_norm": 0.21486635506153107,
"learning_rate": 0.00033851059452787494,
"loss": 0.0379,
"step": 158000
},
{
"epoch": 16.218435639499077,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.02133306674659252,
"eval_runtime": 22.2489,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 158000
},
{
"epoch": 16.321083966331347,
"grad_norm": 0.21918782591819763,
"learning_rate": 0.00033748199958856203,
"loss": 0.0382,
"step": 159000
},
{
"epoch": 16.321083966331347,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.022134315222501755,
"eval_runtime": 22.3537,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 159000
},
{
"epoch": 16.42373229316362,
"grad_norm": 0.1927264928817749,
"learning_rate": 0.0003364534046492491,
"loss": 0.0383,
"step": 160000
},
{
"epoch": 16.42373229316362,
"eval_cer": 0.0115521327014218,
"eval_loss": 0.022146208211779594,
"eval_runtime": 22.5984,
"eval_samples_per_second": 3.983,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04825737265415549,
"step": 160000
},
{
"epoch": 16.526380619995894,
"grad_norm": 0.19513466954231262,
"learning_rate": 0.0003354248097099362,
"loss": 0.0386,
"step": 161000
},
{
"epoch": 16.526380619995894,
"eval_cer": 0.011700236966824644,
"eval_loss": 0.021798642352223396,
"eval_runtime": 22.2348,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04825737265415549,
"step": 161000
},
{
"epoch": 16.629028946828168,
"grad_norm": 0.1991739124059677,
"learning_rate": 0.0003343962147706233,
"loss": 0.038,
"step": 162000
},
{
"epoch": 16.629028946828168,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.021404601633548737,
"eval_runtime": 22.5257,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.044,
"eval_wer": 0.045576407506702415,
"step": 162000
},
{
"epoch": 16.731677273660438,
"grad_norm": 0.19290116429328918,
"learning_rate": 0.0003333676198313104,
"loss": 0.0389,
"step": 163000
},
{
"epoch": 16.731677273660438,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.0213669091463089,
"eval_runtime": 22.5781,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.044,
"eval_wer": 0.041108132260947276,
"step": 163000
},
{
"epoch": 16.83432560049271,
"grad_norm": 0.29244861006736755,
"learning_rate": 0.0003323390248919975,
"loss": 0.0384,
"step": 164000
},
{
"epoch": 16.83432560049271,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.02153705060482025,
"eval_runtime": 22.335,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 164000
},
{
"epoch": 16.936973927324985,
"grad_norm": 0.17148034274578094,
"learning_rate": 0.00033131042995268465,
"loss": 0.0381,
"step": 165000
},
{
"epoch": 16.936973927324985,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.022320713847875595,
"eval_runtime": 22.0009,
"eval_samples_per_second": 4.091,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04289544235924933,
"step": 165000
},
{
"epoch": 17.03962225415726,
"grad_norm": 0.31796592473983765,
"learning_rate": 0.00033028183501337174,
"loss": 0.0384,
"step": 166000
},
{
"epoch": 17.03962225415726,
"eval_cer": 0.01110781990521327,
"eval_loss": 0.02184494584798813,
"eval_runtime": 22.2781,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 166000
},
{
"epoch": 17.14227058098953,
"grad_norm": 0.2634246051311493,
"learning_rate": 0.00032925324007405883,
"loss": 0.0371,
"step": 167000
},
{
"epoch": 17.14227058098953,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.022673843428492546,
"eval_runtime": 22.5449,
"eval_samples_per_second": 3.992,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 167000
},
{
"epoch": 17.244918907821802,
"grad_norm": 0.21225817501544952,
"learning_rate": 0.0003282246451347459,
"loss": 0.0372,
"step": 168000
},
{
"epoch": 17.244918907821802,
"eval_cer": 0.011848341232227487,
"eval_loss": 0.021213963627815247,
"eval_runtime": 22.3119,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04647006255585344,
"step": 168000
},
{
"epoch": 17.347567234654075,
"grad_norm": 0.30099403858184814,
"learning_rate": 0.000327196050195433,
"loss": 0.0375,
"step": 169000
},
{
"epoch": 17.347567234654075,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.021301671862602234,
"eval_runtime": 22.2137,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03932082216264522,
"step": 169000
},
{
"epoch": 17.45021556148635,
"grad_norm": 0.20359040796756744,
"learning_rate": 0.0003261674552561201,
"loss": 0.0378,
"step": 170000
},
{
"epoch": 17.45021556148635,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.02227012813091278,
"eval_runtime": 22.5554,
"eval_samples_per_second": 3.99,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04289544235924933,
"step": 170000
},
{
"epoch": 17.55286388831862,
"grad_norm": 0.225717231631279,
"learning_rate": 0.0003251388603168072,
"loss": 0.0381,
"step": 171000
},
{
"epoch": 17.55286388831862,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.02183985523879528,
"eval_runtime": 22.3674,
"eval_samples_per_second": 4.024,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04289544235924933,
"step": 171000
},
{
"epoch": 17.655512215150893,
"grad_norm": 0.23642343282699585,
"learning_rate": 0.00032411026537749435,
"loss": 0.038,
"step": 172000
},
{
"epoch": 17.655512215150893,
"eval_cer": 0.010071090047393365,
"eval_loss": 0.02163875661790371,
"eval_runtime": 22.3807,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0420017873100983,
"step": 172000
},
{
"epoch": 17.758160541983166,
"grad_norm": 0.206275075674057,
"learning_rate": 0.00032308167043818144,
"loss": 0.0381,
"step": 173000
},
{
"epoch": 17.758160541983166,
"eval_cer": 0.01110781990521327,
"eval_loss": 0.021833743900060654,
"eval_runtime": 22.1601,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 173000
},
{
"epoch": 17.86080886881544,
"grad_norm": 0.1906212568283081,
"learning_rate": 0.00032205307549886854,
"loss": 0.0376,
"step": 174000
},
{
"epoch": 17.86080886881544,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.0216918233782053,
"eval_runtime": 22.5419,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0420017873100983,
"step": 174000
},
{
"epoch": 17.96345719564771,
"grad_norm": 0.2309373915195465,
"learning_rate": 0.00032102448055955563,
"loss": 0.0379,
"step": 175000
},
{
"epoch": 17.96345719564771,
"eval_cer": 0.01229265402843602,
"eval_loss": 0.022404534742236137,
"eval_runtime": 22.4205,
"eval_samples_per_second": 4.014,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 175000
},
{
"epoch": 18.066105522479983,
"grad_norm": 0.26210764050483704,
"learning_rate": 0.0003199958856202427,
"loss": 0.037,
"step": 176000
},
{
"epoch": 18.066105522479983,
"eval_cer": 0.009626777251184835,
"eval_loss": 0.02186032012104988,
"eval_runtime": 22.3427,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0420017873100983,
"step": 176000
},
{
"epoch": 18.168753849312257,
"grad_norm": 0.18146245181560516,
"learning_rate": 0.0003189672906809298,
"loss": 0.0366,
"step": 177000
},
{
"epoch": 18.168753849312257,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.021789953112602234,
"eval_runtime": 22.2303,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 177000
},
{
"epoch": 18.27140217614453,
"grad_norm": 0.21234826743602753,
"learning_rate": 0.0003179386957416169,
"loss": 0.0364,
"step": 178000
},
{
"epoch": 18.27140217614453,
"eval_cer": 0.010071090047393365,
"eval_loss": 0.023085610941052437,
"eval_runtime": 22.5005,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0420017873100983,
"step": 178000
},
{
"epoch": 18.3740505029768,
"grad_norm": 0.20181190967559814,
"learning_rate": 0.00031691010080230406,
"loss": 0.037,
"step": 179000
},
{
"epoch": 18.3740505029768,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.022074325010180473,
"eval_runtime": 22.2124,
"eval_samples_per_second": 4.052,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0420017873100983,
"step": 179000
},
{
"epoch": 18.476698829809074,
"grad_norm": 0.20344142615795135,
"learning_rate": 0.00031588150586299115,
"loss": 0.0368,
"step": 180000
},
{
"epoch": 18.476698829809074,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.02133142203092575,
"eval_runtime": 22.4401,
"eval_samples_per_second": 4.011,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 180000
},
{
"epoch": 18.579347156641347,
"grad_norm": 0.26208797097206116,
"learning_rate": 0.00031485291092367824,
"loss": 0.0374,
"step": 181000
},
{
"epoch": 18.579347156641347,
"eval_cer": 0.010811611374407584,
"eval_loss": 0.022065425291657448,
"eval_runtime": 22.2276,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 181000
},
{
"epoch": 18.68199548347362,
"grad_norm": 0.21672701835632324,
"learning_rate": 0.00031382431598436534,
"loss": 0.0378,
"step": 182000
},
{
"epoch": 18.68199548347362,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.022190110757946968,
"eval_runtime": 22.4805,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.044682752457551385,
"step": 182000
},
{
"epoch": 18.78464381030589,
"grad_norm": 0.26024818420410156,
"learning_rate": 0.00031279572104505243,
"loss": 0.0373,
"step": 183000
},
{
"epoch": 18.78464381030589,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.0211643036454916,
"eval_runtime": 22.2696,
"eval_samples_per_second": 4.041,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 183000
},
{
"epoch": 18.887292137138164,
"grad_norm": 0.15596991777420044,
"learning_rate": 0.0003117671261057395,
"loss": 0.0374,
"step": 184000
},
{
"epoch": 18.887292137138164,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.02078518457710743,
"eval_runtime": 22.3696,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0420017873100983,
"step": 184000
},
{
"epoch": 18.989940463970438,
"grad_norm": 0.21325981616973877,
"learning_rate": 0.0003107385311664266,
"loss": 0.0377,
"step": 185000
},
{
"epoch": 18.989940463970438,
"eval_cer": 0.011700236966824644,
"eval_loss": 0.021274788305163383,
"eval_runtime": 22.3681,
"eval_samples_per_second": 4.024,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 185000
},
{
"epoch": 19.09258879080271,
"grad_norm": 0.2881476581096649,
"learning_rate": 0.00030970993622711376,
"loss": 0.0362,
"step": 186000
},
{
"epoch": 19.09258879080271,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.020506886765360832,
"eval_runtime": 22.4918,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 186000
},
{
"epoch": 19.19523711763498,
"grad_norm": 0.2128625214099884,
"learning_rate": 0.00030868134128780086,
"loss": 0.036,
"step": 187000
},
{
"epoch": 19.19523711763498,
"eval_cer": 0.0115521327014218,
"eval_loss": 0.021428626030683517,
"eval_runtime": 22.4254,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04736371760500447,
"step": 187000
},
{
"epoch": 19.297885444467255,
"grad_norm": 0.20976155996322632,
"learning_rate": 0.000307652746348488,
"loss": 0.0367,
"step": 188000
},
{
"epoch": 19.297885444467255,
"eval_cer": 0.010959715639810427,
"eval_loss": 0.020786074921488762,
"eval_runtime": 22.3038,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.045576407506702415,
"step": 188000
},
{
"epoch": 19.40053377129953,
"grad_norm": 0.21684007346630096,
"learning_rate": 0.0003066241514091751,
"loss": 0.0363,
"step": 189000
},
{
"epoch": 19.40053377129953,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.020662952214479446,
"eval_runtime": 22.499,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.041108132260947276,
"step": 189000
},
{
"epoch": 19.503182098131802,
"grad_norm": 0.4317739009857178,
"learning_rate": 0.0003055955564698622,
"loss": 0.0365,
"step": 190000
},
{
"epoch": 19.503182098131802,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.021092107519507408,
"eval_runtime": 22.4144,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04289544235924933,
"step": 190000
},
{
"epoch": 19.605830424964072,
"grad_norm": 0.23220385611057281,
"learning_rate": 0.0003045669615305493,
"loss": 0.0364,
"step": 191000
},
{
"epoch": 19.605830424964072,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.020856238901615143,
"eval_runtime": 22.4251,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 191000
},
{
"epoch": 19.708478751796346,
"grad_norm": 0.20682792365550995,
"learning_rate": 0.0003035383665912364,
"loss": 0.0369,
"step": 192000
},
{
"epoch": 19.708478751796346,
"eval_cer": 0.01066350710900474,
"eval_loss": 0.020980246365070343,
"eval_runtime": 22.3208,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 192000
},
{
"epoch": 19.81112707862862,
"grad_norm": 0.160901740193367,
"learning_rate": 0.0003025097716519235,
"loss": 0.0366,
"step": 193000
},
{
"epoch": 19.81112707862862,
"eval_cer": 0.009626777251184835,
"eval_loss": 0.02076118066906929,
"eval_runtime": 22.2895,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 193000
},
{
"epoch": 19.913775405460893,
"grad_norm": 0.25278541445732117,
"learning_rate": 0.0003014811767126106,
"loss": 0.0369,
"step": 194000
},
{
"epoch": 19.913775405460893,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.02101094461977482,
"eval_runtime": 22.1128,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03932082216264522,
"step": 194000
},
{
"epoch": 20.016423732293163,
"grad_norm": 0.18868520855903625,
"learning_rate": 0.0003004525817732977,
"loss": 0.0365,
"step": 195000
},
{
"epoch": 20.016423732293163,
"eval_cer": 0.010071090047393365,
"eval_loss": 0.021312745288014412,
"eval_runtime": 22.2522,
"eval_samples_per_second": 4.045,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 195000
},
{
"epoch": 20.119072059125436,
"grad_norm": 0.16919797658920288,
"learning_rate": 0.0002994239868339848,
"loss": 0.0358,
"step": 196000
},
{
"epoch": 20.119072059125436,
"eval_cer": 0.009626777251184835,
"eval_loss": 0.020815536379814148,
"eval_runtime": 22.1691,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 196000
},
{
"epoch": 20.22172038595771,
"grad_norm": 0.2387053221464157,
"learning_rate": 0.0002983953918946719,
"loss": 0.0357,
"step": 197000
},
{
"epoch": 20.22172038595771,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.022103123366832733,
"eval_runtime": 22.0878,
"eval_samples_per_second": 4.075,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 197000
},
{
"epoch": 20.324368712789983,
"grad_norm": 0.1803978830575943,
"learning_rate": 0.000297366796955359,
"loss": 0.036,
"step": 198000
},
{
"epoch": 20.324368712789983,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.02122490108013153,
"eval_runtime": 22.4345,
"eval_samples_per_second": 4.012,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 198000
},
{
"epoch": 20.427017039622253,
"grad_norm": 0.30070099234580994,
"learning_rate": 0.0002963382020160461,
"loss": 0.0358,
"step": 199000
},
{
"epoch": 20.427017039622253,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.020686373114585876,
"eval_runtime": 22.3744,
"eval_samples_per_second": 4.022,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 199000
},
{
"epoch": 20.529665366454527,
"grad_norm": 0.20007756352424622,
"learning_rate": 0.00029530960707673323,
"loss": 0.0361,
"step": 200000
},
{
"epoch": 20.529665366454527,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.019611194729804993,
"eval_runtime": 22.3448,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 200000
},
{
"epoch": 20.6323136932868,
"grad_norm": 0.2129463404417038,
"learning_rate": 0.0002942810121374203,
"loss": 0.0363,
"step": 201000
},
{
"epoch": 20.6323136932868,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.021008532494306564,
"eval_runtime": 22.292,
"eval_samples_per_second": 4.037,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 201000
},
{
"epoch": 20.73496202011907,
"grad_norm": 0.16587744653224945,
"learning_rate": 0.0002932524171981074,
"loss": 0.0359,
"step": 202000
},
{
"epoch": 20.73496202011907,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.02170945331454277,
"eval_runtime": 22.2982,
"eval_samples_per_second": 4.036,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03932082216264522,
"step": 202000
},
{
"epoch": 20.837610346951344,
"grad_norm": 0.1517147719860077,
"learning_rate": 0.0002922238222587945,
"loss": 0.0362,
"step": 203000
},
{
"epoch": 20.837610346951344,
"eval_cer": 0.01110781990521327,
"eval_loss": 0.021431386470794678,
"eval_runtime": 22.317,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.044682752457551385,
"step": 203000
},
{
"epoch": 20.940258673783617,
"grad_norm": 0.33937105536460876,
"learning_rate": 0.0002911952273194816,
"loss": 0.0359,
"step": 204000
},
{
"epoch": 20.940258673783617,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.01971680298447609,
"eval_runtime": 22.523,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 204000
},
{
"epoch": 21.04290700061589,
"grad_norm": 0.19971835613250732,
"learning_rate": 0.0002901666323801687,
"loss": 0.0357,
"step": 205000
},
{
"epoch": 21.04290700061589,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.020647110417485237,
"eval_runtime": 22.1889,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0420017873100983,
"step": 205000
},
{
"epoch": 21.14555532744816,
"grad_norm": 0.2110970914363861,
"learning_rate": 0.0002891380374408558,
"loss": 0.0346,
"step": 206000
},
{
"epoch": 21.14555532744816,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.01989123784005642,
"eval_runtime": 22.7934,
"eval_samples_per_second": 3.949,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 206000
},
{
"epoch": 21.248203654280434,
"grad_norm": 0.20562046766281128,
"learning_rate": 0.00028810944250154294,
"loss": 0.0349,
"step": 207000
},
{
"epoch": 21.248203654280434,
"eval_cer": 0.010515402843601895,
"eval_loss": 0.020657481625676155,
"eval_runtime": 22.7975,
"eval_samples_per_second": 3.948,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04289544235924933,
"step": 207000
},
{
"epoch": 21.350851981112708,
"grad_norm": 0.16318105161190033,
"learning_rate": 0.00028708084756223003,
"loss": 0.0353,
"step": 208000
},
{
"epoch": 21.350851981112708,
"eval_cer": 0.010367298578199052,
"eval_loss": 0.01860020123422146,
"eval_runtime": 22.3138,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.04289544235924933,
"step": 208000
},
{
"epoch": 21.45350030794498,
"grad_norm": 0.2521456480026245,
"learning_rate": 0.0002860522526229171,
"loss": 0.0356,
"step": 209000
},
{
"epoch": 21.45350030794498,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.01932937651872635,
"eval_runtime": 22.5041,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 209000
},
{
"epoch": 21.55614863477725,
"grad_norm": 0.15729331970214844,
"learning_rate": 0.0002850236576836042,
"loss": 0.0356,
"step": 210000
},
{
"epoch": 21.55614863477725,
"eval_cer": 0.01066350710900474,
"eval_loss": 0.01956385001540184,
"eval_runtime": 22.4886,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.044682752457551385,
"step": 210000
},
{
"epoch": 21.658796961609525,
"grad_norm": 0.3368454575538635,
"learning_rate": 0.0002839950627442913,
"loss": 0.0355,
"step": 211000
},
{
"epoch": 21.658796961609525,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.017849326133728027,
"eval_runtime": 22.2354,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03753351206434316,
"step": 211000
},
{
"epoch": 21.7614452884418,
"grad_norm": 0.19548599421977997,
"learning_rate": 0.0002829664678049784,
"loss": 0.0356,
"step": 212000
},
{
"epoch": 21.7614452884418,
"eval_cer": 0.010071090047393365,
"eval_loss": 0.018993813544511795,
"eval_runtime": 22.2405,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 212000
},
{
"epoch": 21.864093615274072,
"grad_norm": 0.300447553396225,
"learning_rate": 0.0002819378728656655,
"loss": 0.0355,
"step": 213000
},
{
"epoch": 21.864093615274072,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.018936272710561752,
"eval_runtime": 22.3288,
"eval_samples_per_second": 4.031,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 213000
},
{
"epoch": 21.966741942106342,
"grad_norm": 0.2299223691225052,
"learning_rate": 0.00028090927792635264,
"loss": 0.0357,
"step": 214000
},
{
"epoch": 21.966741942106342,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.017861895263195038,
"eval_runtime": 22.4153,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 214000
},
{
"epoch": 22.069390268938616,
"grad_norm": 0.22763746976852417,
"learning_rate": 0.00027988068298703974,
"loss": 0.035,
"step": 215000
},
{
"epoch": 22.069390268938616,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.018531307578086853,
"eval_runtime": 22.5994,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.04289544235924933,
"step": 215000
},
{
"epoch": 22.17203859577089,
"grad_norm": 0.17821846902370453,
"learning_rate": 0.00027885208804772683,
"loss": 0.0343,
"step": 216000
},
{
"epoch": 22.17203859577089,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.01801004819571972,
"eval_runtime": 22.6174,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 216000
},
{
"epoch": 22.274686922603163,
"grad_norm": 0.20817448198795319,
"learning_rate": 0.0002778234931084139,
"loss": 0.0353,
"step": 217000
},
{
"epoch": 22.274686922603163,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.01922653615474701,
"eval_runtime": 22.3133,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.0420017873100983,
"step": 217000
},
{
"epoch": 22.377335249435433,
"grad_norm": 0.23358240723609924,
"learning_rate": 0.000276794898169101,
"loss": 0.0347,
"step": 218000
},
{
"epoch": 22.377335249435433,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.019061286002397537,
"eval_runtime": 22.516,
"eval_samples_per_second": 3.997,
"eval_steps_per_second": 0.044,
"eval_wer": 0.041108132260947276,
"step": 218000
},
{
"epoch": 22.479983576267706,
"grad_norm": 0.1586250215768814,
"learning_rate": 0.0002757663032297881,
"loss": 0.0344,
"step": 219000
},
{
"epoch": 22.479983576267706,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.018482740968465805,
"eval_runtime": 22.7441,
"eval_samples_per_second": 3.957,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 219000
},
{
"epoch": 22.58263190309998,
"grad_norm": 0.18222519755363464,
"learning_rate": 0.0002747377082904752,
"loss": 0.0355,
"step": 220000
},
{
"epoch": 22.58263190309998,
"eval_cer": 0.010071090047393365,
"eval_loss": 0.01949753239750862,
"eval_runtime": 22.5921,
"eval_samples_per_second": 3.984,
"eval_steps_per_second": 0.044,
"eval_wer": 0.0420017873100983,
"step": 220000
},
{
"epoch": 22.685280229932253,
"grad_norm": 0.24508023262023926,
"learning_rate": 0.00027370911335116235,
"loss": 0.0351,
"step": 221000
},
{
"epoch": 22.685280229932253,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.019236860796809196,
"eval_runtime": 22.1523,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 221000
},
{
"epoch": 22.787928556764523,
"grad_norm": 0.20518304407596588,
"learning_rate": 0.00027268051841184944,
"loss": 0.0349,
"step": 222000
},
{
"epoch": 22.787928556764523,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.01924031414091587,
"eval_runtime": 22.4441,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 222000
},
{
"epoch": 22.890576883596797,
"grad_norm": 0.34271785616874695,
"learning_rate": 0.00027165192347253654,
"loss": 0.0354,
"step": 223000
},
{
"epoch": 22.890576883596797,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.018572239205241203,
"eval_runtime": 22.2344,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03753351206434316,
"step": 223000
},
{
"epoch": 22.99322521042907,
"grad_norm": 0.23716846108436584,
"learning_rate": 0.00027062332853322363,
"loss": 0.0351,
"step": 224000
},
{
"epoch": 22.99322521042907,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.018431993201375008,
"eval_runtime": 22.1175,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03753351206434316,
"step": 224000
},
{
"epoch": 23.095873537261344,
"grad_norm": 0.20996840298175812,
"learning_rate": 0.0002695947335939107,
"loss": 0.0336,
"step": 225000
},
{
"epoch": 23.095873537261344,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.01876632496714592,
"eval_runtime": 22.1146,
"eval_samples_per_second": 4.07,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03753351206434316,
"step": 225000
},
{
"epoch": 23.198521864093614,
"grad_norm": 0.24316902458667755,
"learning_rate": 0.0002685661386545978,
"loss": 0.0342,
"step": 226000
},
{
"epoch": 23.198521864093614,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.018810244277119637,
"eval_runtime": 22.534,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 226000
},
{
"epoch": 23.301170190925887,
"grad_norm": 0.21960946917533875,
"learning_rate": 0.00026753754371528496,
"loss": 0.0341,
"step": 227000
},
{
"epoch": 23.301170190925887,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.01906524784862995,
"eval_runtime": 22.5098,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 227000
},
{
"epoch": 23.40381851775816,
"grad_norm": 0.24521832168102264,
"learning_rate": 0.00026650894877597206,
"loss": 0.0346,
"step": 228000
},
{
"epoch": 23.40381851775816,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.017732921987771988,
"eval_runtime": 22.3346,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 228000
},
{
"epoch": 23.506466844590435,
"grad_norm": 0.20621488988399506,
"learning_rate": 0.00026548035383665915,
"loss": 0.0341,
"step": 229000
},
{
"epoch": 23.506466844590435,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.018322736024856567,
"eval_runtime": 22.307,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 229000
},
{
"epoch": 23.609115171422705,
"grad_norm": 0.22831113636493683,
"learning_rate": 0.00026445175889734624,
"loss": 0.0348,
"step": 230000
},
{
"epoch": 23.609115171422705,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.018174562603235245,
"eval_runtime": 22.2887,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 230000
},
{
"epoch": 23.711763498254978,
"grad_norm": 0.26975417137145996,
"learning_rate": 0.00026342316395803334,
"loss": 0.0343,
"step": 231000
},
{
"epoch": 23.711763498254978,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.017916005104780197,
"eval_runtime": 22.3158,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 231000
},
{
"epoch": 23.81441182508725,
"grad_norm": 0.18927611410617828,
"learning_rate": 0.00026239456901872043,
"loss": 0.0347,
"step": 232000
},
{
"epoch": 23.81441182508725,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.018648013472557068,
"eval_runtime": 22.3793,
"eval_samples_per_second": 4.022,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 232000
},
{
"epoch": 23.917060151919525,
"grad_norm": 0.3485555350780487,
"learning_rate": 0.0002613659740794075,
"loss": 0.0349,
"step": 233000
},
{
"epoch": 23.917060151919525,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.018734946846961975,
"eval_runtime": 22.4017,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 233000
},
{
"epoch": 24.019708478751795,
"grad_norm": 0.19541509449481964,
"learning_rate": 0.00026033737914009467,
"loss": 0.0346,
"step": 234000
},
{
"epoch": 24.019708478751795,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.01827438361942768,
"eval_runtime": 22.4976,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 234000
},
{
"epoch": 24.12235680558407,
"grad_norm": 0.15688838064670563,
"learning_rate": 0.00025930878420078176,
"loss": 0.0335,
"step": 235000
},
{
"epoch": 24.12235680558407,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.01861654222011566,
"eval_runtime": 22.4918,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 235000
},
{
"epoch": 24.225005132416342,
"grad_norm": 0.15766377747058868,
"learning_rate": 0.00025828018926146886,
"loss": 0.0334,
"step": 236000
},
{
"epoch": 24.225005132416342,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.017844926565885544,
"eval_runtime": 22.542,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 236000
},
{
"epoch": 24.327653459248616,
"grad_norm": 0.27199745178222656,
"learning_rate": 0.00025725159432215595,
"loss": 0.0335,
"step": 237000
},
{
"epoch": 24.327653459248616,
"eval_cer": 0.010219194312796208,
"eval_loss": 0.018539218232035637,
"eval_runtime": 22.1807,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.043789097408400354,
"step": 237000
},
{
"epoch": 24.430301786080886,
"grad_norm": 0.2251148819923401,
"learning_rate": 0.00025622299938284304,
"loss": 0.0339,
"step": 238000
},
{
"epoch": 24.430301786080886,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.018467124551534653,
"eval_runtime": 22.5004,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 238000
},
{
"epoch": 24.53295011291316,
"grad_norm": 0.1856725960969925,
"learning_rate": 0.00025519440444353014,
"loss": 0.0343,
"step": 239000
},
{
"epoch": 24.53295011291316,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.01805044710636139,
"eval_runtime": 22.2875,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.040214477211796246,
"step": 239000
},
{
"epoch": 24.635598439745433,
"grad_norm": 0.23337046802043915,
"learning_rate": 0.00025416580950421723,
"loss": 0.0341,
"step": 240000
},
{
"epoch": 24.635598439745433,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.018100356683135033,
"eval_runtime": 22.3689,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03932082216264522,
"step": 240000
},
{
"epoch": 24.738246766577706,
"grad_norm": 0.3009665012359619,
"learning_rate": 0.0002531372145649044,
"loss": 0.0342,
"step": 241000
},
{
"epoch": 24.738246766577706,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.017376452684402466,
"eval_runtime": 22.3829,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 241000
},
{
"epoch": 24.840895093409976,
"grad_norm": 0.20083321630954742,
"learning_rate": 0.00025210861962559147,
"loss": 0.0341,
"step": 242000
},
{
"epoch": 24.840895093409976,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.018101993948221207,
"eval_runtime": 22.6389,
"eval_samples_per_second": 3.975,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 242000
},
{
"epoch": 24.94354342024225,
"grad_norm": 0.2611863315105438,
"learning_rate": 0.00025108002468627856,
"loss": 0.034,
"step": 243000
},
{
"epoch": 24.94354342024225,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.01786319725215435,
"eval_runtime": 22.5343,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 243000
},
{
"epoch": 25.046191747074523,
"grad_norm": 0.22002221643924713,
"learning_rate": 0.00025005142974696566,
"loss": 0.0337,
"step": 244000
},
{
"epoch": 25.046191747074523,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.01813172735273838,
"eval_runtime": 22.7449,
"eval_samples_per_second": 3.957,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 244000
},
{
"epoch": 25.148840073906797,
"grad_norm": 0.22292836010456085,
"learning_rate": 0.00024902283480765275,
"loss": 0.0331,
"step": 245000
},
{
"epoch": 25.148840073906797,
"eval_cer": 0.010071090047393365,
"eval_loss": 0.018383100628852844,
"eval_runtime": 22.3583,
"eval_samples_per_second": 4.025,
"eval_steps_per_second": 0.045,
"eval_wer": 0.041108132260947276,
"step": 245000
},
{
"epoch": 25.251488400739067,
"grad_norm": 0.22255383431911469,
"learning_rate": 0.00024799423986833984,
"loss": 0.0332,
"step": 246000
},
{
"epoch": 25.251488400739067,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.018330469727516174,
"eval_runtime": 22.5685,
"eval_samples_per_second": 3.988,
"eval_steps_per_second": 0.044,
"eval_wer": 0.041108132260947276,
"step": 246000
},
{
"epoch": 25.35413672757134,
"grad_norm": 0.16578556597232819,
"learning_rate": 0.00024696564492902694,
"loss": 0.0335,
"step": 247000
},
{
"epoch": 25.35413672757134,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.017710883170366287,
"eval_runtime": 22.2874,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 247000
},
{
"epoch": 25.456785054403614,
"grad_norm": 0.24286945164203644,
"learning_rate": 0.0002459370499897141,
"loss": 0.0336,
"step": 248000
},
{
"epoch": 25.456785054403614,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.017551274970173836,
"eval_runtime": 22.5028,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 248000
},
{
"epoch": 25.559433381235884,
"grad_norm": 0.2211551070213318,
"learning_rate": 0.0002449084550504012,
"loss": 0.0334,
"step": 249000
},
{
"epoch": 25.559433381235884,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.017883356660604477,
"eval_runtime": 22.5913,
"eval_samples_per_second": 3.984,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 249000
},
{
"epoch": 25.662081708068158,
"grad_norm": 0.22908490896224976,
"learning_rate": 0.00024387986011108827,
"loss": 0.0336,
"step": 250000
},
{
"epoch": 25.662081708068158,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.01878712698817253,
"eval_runtime": 22.5067,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 250000
},
{
"epoch": 25.76473003490043,
"grad_norm": 0.23008745908737183,
"learning_rate": 0.00024285126517177536,
"loss": 0.0337,
"step": 251000
},
{
"epoch": 25.76473003490043,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.018499523401260376,
"eval_runtime": 22.5329,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 251000
},
{
"epoch": 25.867378361732705,
"grad_norm": 0.197307750582695,
"learning_rate": 0.00024182267023246246,
"loss": 0.0337,
"step": 252000
},
{
"epoch": 25.867378361732705,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01780012436211109,
"eval_runtime": 22.5224,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03485254691689008,
"step": 252000
},
{
"epoch": 25.970026688564978,
"grad_norm": 0.24368754029273987,
"learning_rate": 0.00024079407529314955,
"loss": 0.0338,
"step": 253000
},
{
"epoch": 25.970026688564978,
"eval_cer": 0.009626777251184835,
"eval_loss": 0.017333028838038445,
"eval_runtime": 22.4863,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 253000
},
{
"epoch": 26.072675015397248,
"grad_norm": 0.17760606110095978,
"learning_rate": 0.00023976548035383667,
"loss": 0.0328,
"step": 254000
},
{
"epoch": 26.072675015397248,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.017533306032419205,
"eval_runtime": 22.6165,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 254000
},
{
"epoch": 26.17532334222952,
"grad_norm": 0.24285702407360077,
"learning_rate": 0.00023873688541452376,
"loss": 0.0325,
"step": 255000
},
{
"epoch": 26.17532334222952,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.01821918785572052,
"eval_runtime": 22.4982,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 255000
},
{
"epoch": 26.277971669061795,
"grad_norm": 0.216440811753273,
"learning_rate": 0.00023770829047521086,
"loss": 0.0328,
"step": 256000
},
{
"epoch": 26.277971669061795,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.01876477338373661,
"eval_runtime": 22.6643,
"eval_samples_per_second": 3.971,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 256000
},
{
"epoch": 26.380619995894065,
"grad_norm": 0.18202808499336243,
"learning_rate": 0.00023667969553589798,
"loss": 0.0327,
"step": 257000
},
{
"epoch": 26.380619995894065,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.017915373668074608,
"eval_runtime": 22.5776,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 257000
},
{
"epoch": 26.48326832272634,
"grad_norm": 0.183961883187294,
"learning_rate": 0.00023565110059658507,
"loss": 0.0331,
"step": 258000
},
{
"epoch": 26.48326832272634,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.018149225041270256,
"eval_runtime": 22.4894,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 258000
},
{
"epoch": 26.585916649558612,
"grad_norm": 0.2554219365119934,
"learning_rate": 0.00023462250565727216,
"loss": 0.033,
"step": 259000
},
{
"epoch": 26.585916649558612,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.017295319586992264,
"eval_runtime": 22.4744,
"eval_samples_per_second": 4.005,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 259000
},
{
"epoch": 26.688564976390886,
"grad_norm": 0.3923017978668213,
"learning_rate": 0.00023359391071795926,
"loss": 0.0334,
"step": 260000
},
{
"epoch": 26.688564976390886,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.018423665314912796,
"eval_runtime": 22.6112,
"eval_samples_per_second": 3.98,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 260000
},
{
"epoch": 26.791213303223156,
"grad_norm": 0.413510262966156,
"learning_rate": 0.00023256531577864638,
"loss": 0.0334,
"step": 261000
},
{
"epoch": 26.791213303223156,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.017927566543221474,
"eval_runtime": 22.5258,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 261000
},
{
"epoch": 26.89386163005543,
"grad_norm": 0.25007760524749756,
"learning_rate": 0.00023153672083933347,
"loss": 0.0333,
"step": 262000
},
{
"epoch": 26.89386163005543,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.017284687608480453,
"eval_runtime": 22.5825,
"eval_samples_per_second": 3.985,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 262000
},
{
"epoch": 26.996509956887703,
"grad_norm": 0.3856132924556732,
"learning_rate": 0.00023050812590002056,
"loss": 0.0336,
"step": 263000
},
{
"epoch": 26.996509956887703,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.017093736678361893,
"eval_runtime": 22.7688,
"eval_samples_per_second": 3.953,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 263000
},
{
"epoch": 27.099158283719976,
"grad_norm": 0.3299943208694458,
"learning_rate": 0.00022947953096070768,
"loss": 0.0321,
"step": 264000
},
{
"epoch": 27.099158283719976,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.017240121960639954,
"eval_runtime": 22.5916,
"eval_samples_per_second": 3.984,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 264000
},
{
"epoch": 27.201806610552246,
"grad_norm": 0.1730368584394455,
"learning_rate": 0.00022845093602139478,
"loss": 0.0327,
"step": 265000
},
{
"epoch": 27.201806610552246,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.01783335767686367,
"eval_runtime": 22.2378,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 265000
},
{
"epoch": 27.30445493738452,
"grad_norm": 0.3776761293411255,
"learning_rate": 0.00022742234108208187,
"loss": 0.0326,
"step": 266000
},
{
"epoch": 27.30445493738452,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.017694596201181412,
"eval_runtime": 22.5643,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.040214477211796246,
"step": 266000
},
{
"epoch": 27.407103264216794,
"grad_norm": 0.17738159000873566,
"learning_rate": 0.00022639374614276896,
"loss": 0.0324,
"step": 267000
},
{
"epoch": 27.407103264216794,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.017476912587881088,
"eval_runtime": 22.7303,
"eval_samples_per_second": 3.959,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 267000
},
{
"epoch": 27.509751591049067,
"grad_norm": 0.2550307512283325,
"learning_rate": 0.00022536515120345608,
"loss": 0.0325,
"step": 268000
},
{
"epoch": 27.509751591049067,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.01743141934275627,
"eval_runtime": 22.7564,
"eval_samples_per_second": 3.955,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 268000
},
{
"epoch": 27.612399917881337,
"grad_norm": 0.16253231465816498,
"learning_rate": 0.00022433655626414317,
"loss": 0.0327,
"step": 269000
},
{
"epoch": 27.612399917881337,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.017301015555858612,
"eval_runtime": 23.1263,
"eval_samples_per_second": 3.892,
"eval_steps_per_second": 0.043,
"eval_wer": 0.03663985701519214,
"step": 269000
},
{
"epoch": 27.71504824471361,
"grad_norm": 0.15195374190807343,
"learning_rate": 0.00022330796132483027,
"loss": 0.0322,
"step": 270000
},
{
"epoch": 27.71504824471361,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.017138667404651642,
"eval_runtime": 23.3844,
"eval_samples_per_second": 3.849,
"eval_steps_per_second": 0.043,
"eval_wer": 0.03395889186773905,
"step": 270000
},
{
"epoch": 27.817696571545884,
"grad_norm": 0.2660813331604004,
"learning_rate": 0.0002222793663855174,
"loss": 0.0332,
"step": 271000
},
{
"epoch": 27.817696571545884,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.016922015696763992,
"eval_runtime": 22.6918,
"eval_samples_per_second": 3.966,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03485254691689008,
"step": 271000
},
{
"epoch": 27.920344898378158,
"grad_norm": 0.20602907240390778,
"learning_rate": 0.00022125077144620448,
"loss": 0.0328,
"step": 272000
},
{
"epoch": 27.920344898378158,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.016544727608561516,
"eval_runtime": 22.5504,
"eval_samples_per_second": 3.991,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 272000
},
{
"epoch": 28.022993225210428,
"grad_norm": 0.22045257687568665,
"learning_rate": 0.00022022217650689157,
"loss": 0.0324,
"step": 273000
},
{
"epoch": 28.022993225210428,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.016568990424275398,
"eval_runtime": 22.6458,
"eval_samples_per_second": 3.974,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 273000
},
{
"epoch": 28.1256415520427,
"grad_norm": 0.18806225061416626,
"learning_rate": 0.0002191935815675787,
"loss": 0.0315,
"step": 274000
},
{
"epoch": 28.1256415520427,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.016486881300807,
"eval_runtime": 22.6614,
"eval_samples_per_second": 3.972,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03485254691689008,
"step": 274000
},
{
"epoch": 28.228289878874975,
"grad_norm": 0.1898849457502365,
"learning_rate": 0.0002181649866282658,
"loss": 0.0319,
"step": 275000
},
{
"epoch": 28.228289878874975,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.016169264912605286,
"eval_runtime": 22.5647,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 275000
},
{
"epoch": 28.33093820570725,
"grad_norm": 0.27770882844924927,
"learning_rate": 0.00021713639168895288,
"loss": 0.0323,
"step": 276000
},
{
"epoch": 28.33093820570725,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01689663529396057,
"eval_runtime": 22.5984,
"eval_samples_per_second": 3.983,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03485254691689008,
"step": 276000
},
{
"epoch": 28.43358653253952,
"grad_norm": 0.23435185849666595,
"learning_rate": 0.00021610779674963997,
"loss": 0.0322,
"step": 277000
},
{
"epoch": 28.43358653253952,
"eval_cer": 0.009922985781990521,
"eval_loss": 0.016887083649635315,
"eval_runtime": 23.1898,
"eval_samples_per_second": 3.881,
"eval_steps_per_second": 0.043,
"eval_wer": 0.03932082216264522,
"step": 277000
},
{
"epoch": 28.536234859371792,
"grad_norm": 0.19802525639533997,
"learning_rate": 0.0002150792018103271,
"loss": 0.0319,
"step": 278000
},
{
"epoch": 28.536234859371792,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.016642894595861435,
"eval_runtime": 22.9038,
"eval_samples_per_second": 3.929,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 278000
},
{
"epoch": 28.638883186204065,
"grad_norm": 0.33312317728996277,
"learning_rate": 0.0002140506068710142,
"loss": 0.0324,
"step": 279000
},
{
"epoch": 28.638883186204065,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.016943588852882385,
"eval_runtime": 22.6215,
"eval_samples_per_second": 3.979,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 279000
},
{
"epoch": 28.74153151303634,
"grad_norm": 0.19455067813396454,
"learning_rate": 0.00021302201193170128,
"loss": 0.0326,
"step": 280000
},
{
"epoch": 28.74153151303634,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.016804693266749382,
"eval_runtime": 22.602,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 280000
},
{
"epoch": 28.84417983986861,
"grad_norm": 0.14672687649726868,
"learning_rate": 0.0002119934169923884,
"loss": 0.0328,
"step": 281000
},
{
"epoch": 28.84417983986861,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.016962364315986633,
"eval_runtime": 22.456,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 281000
},
{
"epoch": 28.946828166700882,
"grad_norm": 0.3485030233860016,
"learning_rate": 0.00021096482205307552,
"loss": 0.0322,
"step": 282000
},
{
"epoch": 28.946828166700882,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.017243940383195877,
"eval_runtime": 22.6149,
"eval_samples_per_second": 3.98,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03842716711349419,
"step": 282000
},
{
"epoch": 29.049476493533156,
"grad_norm": 0.20129866898059845,
"learning_rate": 0.00020993622711376261,
"loss": 0.0317,
"step": 283000
},
{
"epoch": 29.049476493533156,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.01694132201373577,
"eval_runtime": 22.5131,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 283000
},
{
"epoch": 29.15212482036543,
"grad_norm": 0.32001006603240967,
"learning_rate": 0.0002089076321744497,
"loss": 0.0315,
"step": 284000
},
{
"epoch": 29.15212482036543,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.01657693088054657,
"eval_runtime": 22.5678,
"eval_samples_per_second": 3.988,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 284000
},
{
"epoch": 29.2547731471977,
"grad_norm": 0.21219150722026825,
"learning_rate": 0.00020787903723513683,
"loss": 0.0315,
"step": 285000
},
{
"epoch": 29.2547731471977,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.016005953773856163,
"eval_runtime": 22.3068,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 285000
},
{
"epoch": 29.357421474029973,
"grad_norm": 0.20887607336044312,
"learning_rate": 0.00020685044229582392,
"loss": 0.0319,
"step": 286000
},
{
"epoch": 29.357421474029973,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.016784099861979485,
"eval_runtime": 22.3044,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 286000
},
{
"epoch": 29.460069800862247,
"grad_norm": 0.28705254197120667,
"learning_rate": 0.00020582184735651101,
"loss": 0.0314,
"step": 287000
},
{
"epoch": 29.460069800862247,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.016876235604286194,
"eval_runtime": 22.2793,
"eval_samples_per_second": 4.04,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 287000
},
{
"epoch": 29.56271812769452,
"grad_norm": 0.323476642370224,
"learning_rate": 0.00020479325241719813,
"loss": 0.032,
"step": 288000
},
{
"epoch": 29.56271812769452,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.016845477744936943,
"eval_runtime": 22.1977,
"eval_samples_per_second": 4.054,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 288000
},
{
"epoch": 29.66536645452679,
"grad_norm": 0.1829458326101303,
"learning_rate": 0.00020376465747788523,
"loss": 0.0321,
"step": 289000
},
{
"epoch": 29.66536645452679,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.015846768394112587,
"eval_runtime": 22.1026,
"eval_samples_per_second": 4.072,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 289000
},
{
"epoch": 29.768014781359064,
"grad_norm": 0.25482961535453796,
"learning_rate": 0.00020273606253857232,
"loss": 0.032,
"step": 290000
},
{
"epoch": 29.768014781359064,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.015906278043985367,
"eval_runtime": 22.3867,
"eval_samples_per_second": 4.02,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 290000
},
{
"epoch": 29.870663108191337,
"grad_norm": 0.19813385605812073,
"learning_rate": 0.00020170746759925941,
"loss": 0.0319,
"step": 291000
},
{
"epoch": 29.870663108191337,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.016582278534770012,
"eval_runtime": 22.6644,
"eval_samples_per_second": 3.971,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 291000
},
{
"epoch": 29.97331143502361,
"grad_norm": 0.23543916642665863,
"learning_rate": 0.00020067887265994653,
"loss": 0.0321,
"step": 292000
},
{
"epoch": 29.97331143502361,
"eval_cer": 0.009774881516587678,
"eval_loss": 0.01627834513783455,
"eval_runtime": 22.7565,
"eval_samples_per_second": 3.955,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 292000
},
{
"epoch": 30.07595976185588,
"grad_norm": 0.20127102732658386,
"learning_rate": 0.00019965027772063363,
"loss": 0.0309,
"step": 293000
},
{
"epoch": 30.07595976185588,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.016053717583417892,
"eval_runtime": 22.4512,
"eval_samples_per_second": 4.009,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 293000
},
{
"epoch": 30.178608088688154,
"grad_norm": 0.19356395304203033,
"learning_rate": 0.00019862168278132072,
"loss": 0.031,
"step": 294000
},
{
"epoch": 30.178608088688154,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.01641259714961052,
"eval_runtime": 22.3968,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03753351206434316,
"step": 294000
},
{
"epoch": 30.281256415520428,
"grad_norm": 0.3252677619457245,
"learning_rate": 0.00019759308784200784,
"loss": 0.0314,
"step": 295000
},
{
"epoch": 30.281256415520428,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.015991076827049255,
"eval_runtime": 22.4292,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 295000
},
{
"epoch": 30.3839047423527,
"grad_norm": 0.1807209700345993,
"learning_rate": 0.00019656449290269493,
"loss": 0.0312,
"step": 296000
},
{
"epoch": 30.3839047423527,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.015960650518536568,
"eval_runtime": 22.2631,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 296000
},
{
"epoch": 30.48655306918497,
"grad_norm": 0.2719903588294983,
"learning_rate": 0.00019553589796338203,
"loss": 0.0312,
"step": 297000
},
{
"epoch": 30.48655306918497,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.015721548348665237,
"eval_runtime": 22.1868,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 297000
},
{
"epoch": 30.589201396017245,
"grad_norm": 0.32360509037971497,
"learning_rate": 0.00019450730302406912,
"loss": 0.0314,
"step": 298000
},
{
"epoch": 30.589201396017245,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01612556166946888,
"eval_runtime": 22.345,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 298000
},
{
"epoch": 30.69184972284952,
"grad_norm": 0.28737571835517883,
"learning_rate": 0.00019347870808475624,
"loss": 0.0317,
"step": 299000
},
{
"epoch": 30.69184972284952,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.016010984778404236,
"eval_runtime": 22.4982,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 299000
},
{
"epoch": 30.794498049681792,
"grad_norm": 0.16976721584796906,
"learning_rate": 0.00019245011314544333,
"loss": 0.0315,
"step": 300000
},
{
"epoch": 30.794498049681792,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.01699880138039589,
"eval_runtime": 22.4468,
"eval_samples_per_second": 4.009,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 300000
},
{
"epoch": 30.897146376514062,
"grad_norm": 0.2326597273349762,
"learning_rate": 0.00019142151820613043,
"loss": 0.0315,
"step": 301000
},
{
"epoch": 30.897146376514062,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.016296546906232834,
"eval_runtime": 22.4046,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03842716711349419,
"step": 301000
},
{
"epoch": 30.999794703346335,
"grad_norm": 0.4060353934764862,
"learning_rate": 0.00019039292326681755,
"loss": 0.0318,
"step": 302000
},
{
"epoch": 30.999794703346335,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.01701035536825657,
"eval_runtime": 22.4393,
"eval_samples_per_second": 4.011,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 302000
},
{
"epoch": 31.10244303017861,
"grad_norm": 0.19074688851833344,
"learning_rate": 0.00018936432832750464,
"loss": 0.0304,
"step": 303000
},
{
"epoch": 31.10244303017861,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.016249870881438255,
"eval_runtime": 22.6009,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 303000
},
{
"epoch": 31.20509135701088,
"grad_norm": 0.1725562959909439,
"learning_rate": 0.00018833573338819173,
"loss": 0.0309,
"step": 304000
},
{
"epoch": 31.20509135701088,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.01692904904484749,
"eval_runtime": 22.3452,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 304000
},
{
"epoch": 31.307739683843153,
"grad_norm": 0.18745183944702148,
"learning_rate": 0.00018730713844887883,
"loss": 0.0308,
"step": 305000
},
{
"epoch": 31.307739683843153,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.01693115197122097,
"eval_runtime": 22.313,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 305000
},
{
"epoch": 31.410388010675426,
"grad_norm": 0.2364443838596344,
"learning_rate": 0.00018627854350956595,
"loss": 0.0311,
"step": 306000
},
{
"epoch": 31.410388010675426,
"eval_cer": 0.009626777251184835,
"eval_loss": 0.016600091010332108,
"eval_runtime": 22.2551,
"eval_samples_per_second": 4.044,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03932082216264522,
"step": 306000
},
{
"epoch": 31.5130363375077,
"grad_norm": 0.19486719369888306,
"learning_rate": 0.00018524994857025304,
"loss": 0.0309,
"step": 307000
},
{
"epoch": 31.5130363375077,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.01619219221174717,
"eval_runtime": 22.3592,
"eval_samples_per_second": 4.025,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 307000
},
{
"epoch": 31.61568466433997,
"grad_norm": 0.20491057634353638,
"learning_rate": 0.00018422135363094013,
"loss": 0.0308,
"step": 308000
},
{
"epoch": 31.61568466433997,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.016473235562443733,
"eval_runtime": 22.4059,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 308000
},
{
"epoch": 31.718332991172243,
"grad_norm": 0.26360154151916504,
"learning_rate": 0.00018319275869162725,
"loss": 0.0312,
"step": 309000
},
{
"epoch": 31.718332991172243,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.01562103908509016,
"eval_runtime": 22.1604,
"eval_samples_per_second": 4.061,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 309000
},
{
"epoch": 31.820981318004517,
"grad_norm": 0.15921452641487122,
"learning_rate": 0.00018216416375231435,
"loss": 0.0316,
"step": 310000
},
{
"epoch": 31.820981318004517,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.016504855826497078,
"eval_runtime": 22.5365,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 310000
},
{
"epoch": 31.92362964483679,
"grad_norm": 0.27201271057128906,
"learning_rate": 0.00018113556881300144,
"loss": 0.0314,
"step": 311000
},
{
"epoch": 31.92362964483679,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.015464858151972294,
"eval_runtime": 22.5378,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 311000
},
{
"epoch": 32.02627797166906,
"grad_norm": 0.3564852178096771,
"learning_rate": 0.00018010697387368853,
"loss": 0.0309,
"step": 312000
},
{
"epoch": 32.02627797166906,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.01600516401231289,
"eval_runtime": 22.3728,
"eval_samples_per_second": 4.023,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 312000
},
{
"epoch": 32.128926298501334,
"grad_norm": 0.26071467995643616,
"learning_rate": 0.00017907837893437565,
"loss": 0.0307,
"step": 313000
},
{
"epoch": 32.128926298501334,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.016347436234354973,
"eval_runtime": 22.155,
"eval_samples_per_second": 4.062,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 313000
},
{
"epoch": 32.23157462533361,
"grad_norm": 0.17180919647216797,
"learning_rate": 0.00017804978399506275,
"loss": 0.0307,
"step": 314000
},
{
"epoch": 32.23157462533361,
"eval_cer": 0.009182464454976303,
"eval_loss": 0.01613152027130127,
"eval_runtime": 22.4889,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 314000
},
{
"epoch": 32.33422295216588,
"grad_norm": 0.16418492794036865,
"learning_rate": 0.00017702118905574984,
"loss": 0.0301,
"step": 315000
},
{
"epoch": 32.33422295216588,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.015433421358466148,
"eval_runtime": 22.2866,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 315000
},
{
"epoch": 32.436871278998154,
"grad_norm": 0.2582835853099823,
"learning_rate": 0.00017599259411643696,
"loss": 0.0307,
"step": 316000
},
{
"epoch": 32.436871278998154,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.015836581587791443,
"eval_runtime": 22.5371,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 316000
},
{
"epoch": 32.53951960583043,
"grad_norm": 0.2574046552181244,
"learning_rate": 0.00017496399917712405,
"loss": 0.0308,
"step": 317000
},
{
"epoch": 32.53951960583043,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.015740592032670975,
"eval_runtime": 22.3104,
"eval_samples_per_second": 4.034,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 317000
},
{
"epoch": 32.642167932662694,
"grad_norm": 0.17120341956615448,
"learning_rate": 0.00017393540423781115,
"loss": 0.0304,
"step": 318000
},
{
"epoch": 32.642167932662694,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01630273461341858,
"eval_runtime": 22.6719,
"eval_samples_per_second": 3.97,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 318000
},
{
"epoch": 32.74481625949497,
"grad_norm": 0.27363935112953186,
"learning_rate": 0.00017290680929849824,
"loss": 0.0309,
"step": 319000
},
{
"epoch": 32.74481625949497,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.016513481736183167,
"eval_runtime": 22.5102,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 319000
},
{
"epoch": 32.84746458632724,
"grad_norm": 0.29559651017189026,
"learning_rate": 0.00017187821435918536,
"loss": 0.0307,
"step": 320000
},
{
"epoch": 32.84746458632724,
"eval_cer": 0.009330568720379146,
"eval_loss": 0.01666964590549469,
"eval_runtime": 22.527,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03932082216264522,
"step": 320000
},
{
"epoch": 32.950112913159515,
"grad_norm": 0.1644178181886673,
"learning_rate": 0.00017084961941987245,
"loss": 0.0305,
"step": 321000
},
{
"epoch": 32.950112913159515,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.01613970287144184,
"eval_runtime": 22.6019,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 321000
},
{
"epoch": 33.05276123999179,
"grad_norm": 0.24239134788513184,
"learning_rate": 0.00016982102448055955,
"loss": 0.0301,
"step": 322000
},
{
"epoch": 33.05276123999179,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.015571474097669125,
"eval_runtime": 22.7263,
"eval_samples_per_second": 3.96,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 322000
},
{
"epoch": 33.15540956682406,
"grad_norm": 0.16490726172924042,
"learning_rate": 0.00016879242954124667,
"loss": 0.0296,
"step": 323000
},
{
"epoch": 33.15540956682406,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01544241514056921,
"eval_runtime": 22.655,
"eval_samples_per_second": 3.973,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 323000
},
{
"epoch": 33.258057893656336,
"grad_norm": 0.19249847531318665,
"learning_rate": 0.00016776383460193376,
"loss": 0.0299,
"step": 324000
},
{
"epoch": 33.258057893656336,
"eval_cer": 0.009478672985781991,
"eval_loss": 0.015552397817373276,
"eval_runtime": 22.3811,
"eval_samples_per_second": 4.021,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03932082216264522,
"step": 324000
},
{
"epoch": 33.36070622048861,
"grad_norm": 0.2540992200374603,
"learning_rate": 0.00016673523966262085,
"loss": 0.0298,
"step": 325000
},
{
"epoch": 33.36070622048861,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.01545222382992506,
"eval_runtime": 22.2235,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 325000
},
{
"epoch": 33.463354547320876,
"grad_norm": 0.17843221127986908,
"learning_rate": 0.00016570664472330795,
"loss": 0.0305,
"step": 326000
},
{
"epoch": 33.463354547320876,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.015175366774201393,
"eval_runtime": 22.1848,
"eval_samples_per_second": 4.057,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 326000
},
{
"epoch": 33.56600287415315,
"grad_norm": 0.25464072823524475,
"learning_rate": 0.00016467804978399507,
"loss": 0.0306,
"step": 327000
},
{
"epoch": 33.56600287415315,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.014621883630752563,
"eval_runtime": 22.1184,
"eval_samples_per_second": 4.069,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 327000
},
{
"epoch": 33.66865120098542,
"grad_norm": 0.16736038029193878,
"learning_rate": 0.00016364945484468216,
"loss": 0.0308,
"step": 328000
},
{
"epoch": 33.66865120098542,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.01590174064040184,
"eval_runtime": 22.2326,
"eval_samples_per_second": 4.048,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 328000
},
{
"epoch": 33.771299527817696,
"grad_norm": 0.24227890372276306,
"learning_rate": 0.00016262085990536925,
"loss": 0.0304,
"step": 329000
},
{
"epoch": 33.771299527817696,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.015669850632548332,
"eval_runtime": 22.3742,
"eval_samples_per_second": 4.022,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 329000
},
{
"epoch": 33.87394785464997,
"grad_norm": 0.18546319007873535,
"learning_rate": 0.00016159226496605637,
"loss": 0.0306,
"step": 330000
},
{
"epoch": 33.87394785464997,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.015102799981832504,
"eval_runtime": 22.5007,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 330000
},
{
"epoch": 33.97659618148224,
"grad_norm": 0.23194563388824463,
"learning_rate": 0.00016056367002674347,
"loss": 0.0305,
"step": 331000
},
{
"epoch": 33.97659618148224,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.015225354582071304,
"eval_runtime": 22.6228,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03485254691689008,
"step": 331000
},
{
"epoch": 34.07924450831452,
"grad_norm": 0.18987098336219788,
"learning_rate": 0.00015953507508743056,
"loss": 0.0294,
"step": 332000
},
{
"epoch": 34.07924450831452,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.015497728250920773,
"eval_runtime": 22.4328,
"eval_samples_per_second": 4.012,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 332000
},
{
"epoch": 34.18189283514679,
"grad_norm": 0.16452568769454956,
"learning_rate": 0.00015850648014811765,
"loss": 0.0299,
"step": 333000
},
{
"epoch": 34.18189283514679,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01510694995522499,
"eval_runtime": 22.2639,
"eval_samples_per_second": 4.042,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 333000
},
{
"epoch": 34.28454116197906,
"grad_norm": 0.2669082283973694,
"learning_rate": 0.00015747788520880477,
"loss": 0.0297,
"step": 334000
},
{
"epoch": 34.28454116197906,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.015526418574154377,
"eval_runtime": 22.4447,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 334000
},
{
"epoch": 34.38718948881133,
"grad_norm": 0.23023459315299988,
"learning_rate": 0.00015644929026949187,
"loss": 0.0298,
"step": 335000
},
{
"epoch": 34.38718948881133,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.01542733982205391,
"eval_runtime": 22.4734,
"eval_samples_per_second": 4.005,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03753351206434316,
"step": 335000
},
{
"epoch": 34.489837815643604,
"grad_norm": 0.3047637343406677,
"learning_rate": 0.00015542069533017896,
"loss": 0.0301,
"step": 336000
},
{
"epoch": 34.489837815643604,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.0148982098326087,
"eval_runtime": 22.2904,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03753351206434316,
"step": 336000
},
{
"epoch": 34.59248614247588,
"grad_norm": 0.2189682275056839,
"learning_rate": 0.00015439210039086608,
"loss": 0.0299,
"step": 337000
},
{
"epoch": 34.59248614247588,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.014870991930365562,
"eval_runtime": 22.4248,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 337000
},
{
"epoch": 34.69513446930815,
"grad_norm": 0.1642763465642929,
"learning_rate": 0.00015336350545155317,
"loss": 0.03,
"step": 338000
},
{
"epoch": 34.69513446930815,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.015146846882998943,
"eval_runtime": 22.5811,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 338000
},
{
"epoch": 34.797782796140424,
"grad_norm": 0.21596594154834747,
"learning_rate": 0.00015233491051224027,
"loss": 0.03,
"step": 339000
},
{
"epoch": 34.797782796140424,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.014487739652395248,
"eval_runtime": 22.3193,
"eval_samples_per_second": 4.032,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 339000
},
{
"epoch": 34.9004311229727,
"grad_norm": 0.15714465081691742,
"learning_rate": 0.00015130631557292736,
"loss": 0.0305,
"step": 340000
},
{
"epoch": 34.9004311229727,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.014626013115048409,
"eval_runtime": 22.3971,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 340000
},
{
"epoch": 35.00307944980497,
"grad_norm": 0.2639608383178711,
"learning_rate": 0.00015027772063361448,
"loss": 0.03,
"step": 341000
},
{
"epoch": 35.00307944980497,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.014959813095629215,
"eval_runtime": 22.4235,
"eval_samples_per_second": 4.014,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 341000
},
{
"epoch": 35.10572777663724,
"grad_norm": 0.1461016684770584,
"learning_rate": 0.00014924912569430157,
"loss": 0.029,
"step": 342000
},
{
"epoch": 35.10572777663724,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.015357970260083675,
"eval_runtime": 22.4778,
"eval_samples_per_second": 4.004,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 342000
},
{
"epoch": 35.20837610346951,
"grad_norm": 0.21027566492557526,
"learning_rate": 0.00014822053075498867,
"loss": 0.0294,
"step": 343000
},
{
"epoch": 35.20837610346951,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.014925923198461533,
"eval_runtime": 22.563,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 343000
},
{
"epoch": 35.311024430301785,
"grad_norm": 0.26173582673072815,
"learning_rate": 0.0001471919358156758,
"loss": 0.0294,
"step": 344000
},
{
"epoch": 35.311024430301785,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.014718250371515751,
"eval_runtime": 22.4858,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 344000
},
{
"epoch": 35.41367275713406,
"grad_norm": 0.21235166490077972,
"learning_rate": 0.0001461633408763629,
"loss": 0.0295,
"step": 345000
},
{
"epoch": 35.41367275713406,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.014633492566645145,
"eval_runtime": 22.4549,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 345000
},
{
"epoch": 35.51632108396633,
"grad_norm": 0.2826511263847351,
"learning_rate": 0.00014513474593705,
"loss": 0.0296,
"step": 346000
},
{
"epoch": 35.51632108396633,
"eval_cer": 0.00903436018957346,
"eval_loss": 0.015351605601608753,
"eval_runtime": 22.6095,
"eval_samples_per_second": 3.981,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 346000
},
{
"epoch": 35.618969410798606,
"grad_norm": 0.15846475958824158,
"learning_rate": 0.00014410615099773712,
"loss": 0.0298,
"step": 347000
},
{
"epoch": 35.618969410798606,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.014954261481761932,
"eval_runtime": 22.6016,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 347000
},
{
"epoch": 35.72161773763088,
"grad_norm": 0.30824708938598633,
"learning_rate": 0.0001430775560584242,
"loss": 0.0298,
"step": 348000
},
{
"epoch": 35.72161773763088,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.015362138859927654,
"eval_runtime": 22.4427,
"eval_samples_per_second": 4.01,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 348000
},
{
"epoch": 35.824266064463146,
"grad_norm": 0.29468515515327454,
"learning_rate": 0.0001420489611191113,
"loss": 0.0297,
"step": 349000
},
{
"epoch": 35.824266064463146,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.014831768348813057,
"eval_runtime": 22.4167,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 349000
},
{
"epoch": 35.92691439129542,
"grad_norm": 0.3372247815132141,
"learning_rate": 0.0001410203661797984,
"loss": 0.0297,
"step": 350000
},
{
"epoch": 35.92691439129542,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.015637291595339775,
"eval_runtime": 22.6706,
"eval_samples_per_second": 3.97,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03485254691689008,
"step": 350000
},
{
"epoch": 36.02956271812769,
"grad_norm": 0.2713555693626404,
"learning_rate": 0.00013999177124048552,
"loss": 0.0295,
"step": 351000
},
{
"epoch": 36.02956271812769,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01471824198961258,
"eval_runtime": 22.5633,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 351000
},
{
"epoch": 36.132211044959966,
"grad_norm": 0.2100389152765274,
"learning_rate": 0.0001389631763011726,
"loss": 0.0288,
"step": 352000
},
{
"epoch": 36.132211044959966,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.014739991165697575,
"eval_runtime": 22.6462,
"eval_samples_per_second": 3.974,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 352000
},
{
"epoch": 36.23485937179224,
"grad_norm": 0.2058141529560089,
"learning_rate": 0.0001379345813618597,
"loss": 0.0294,
"step": 353000
},
{
"epoch": 36.23485937179224,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.014310094527900219,
"eval_runtime": 22.4136,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 353000
},
{
"epoch": 36.33750769862451,
"grad_norm": 0.21993263065814972,
"learning_rate": 0.00013690598642254683,
"loss": 0.0288,
"step": 354000
},
{
"epoch": 36.33750769862451,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.01488146185874939,
"eval_runtime": 22.7382,
"eval_samples_per_second": 3.958,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 354000
},
{
"epoch": 36.44015602545679,
"grad_norm": 0.44507354497909546,
"learning_rate": 0.00013587739148323392,
"loss": 0.0291,
"step": 355000
},
{
"epoch": 36.44015602545679,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.014914470724761486,
"eval_runtime": 22.4894,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 355000
},
{
"epoch": 36.54280435228906,
"grad_norm": 0.29088860750198364,
"learning_rate": 0.000134848796543921,
"loss": 0.0297,
"step": 356000
},
{
"epoch": 36.54280435228906,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.015177594497799873,
"eval_runtime": 22.3664,
"eval_samples_per_second": 4.024,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03663985701519214,
"step": 356000
},
{
"epoch": 36.64545267912133,
"grad_norm": 0.28655806183815,
"learning_rate": 0.0001338202016046081,
"loss": 0.0291,
"step": 357000
},
{
"epoch": 36.64545267912133,
"eval_cer": 0.008886255924170616,
"eval_loss": 0.014854129403829575,
"eval_runtime": 22.3549,
"eval_samples_per_second": 4.026,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 357000
},
{
"epoch": 36.7481010059536,
"grad_norm": 0.16526740789413452,
"learning_rate": 0.00013279160666529523,
"loss": 0.0293,
"step": 358000
},
{
"epoch": 36.7481010059536,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.015399602241814137,
"eval_runtime": 22.5162,
"eval_samples_per_second": 3.997,
"eval_steps_per_second": 0.044,
"eval_wer": 0.035746201966041107,
"step": 358000
},
{
"epoch": 36.850749332785874,
"grad_norm": 0.16871041059494019,
"learning_rate": 0.00013176301172598232,
"loss": 0.0296,
"step": 359000
},
{
"epoch": 36.850749332785874,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.015135127119719982,
"eval_runtime": 22.5959,
"eval_samples_per_second": 3.983,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 359000
},
{
"epoch": 36.95339765961815,
"grad_norm": 0.3259669542312622,
"learning_rate": 0.0001307344167866694,
"loss": 0.0296,
"step": 360000
},
{
"epoch": 36.95339765961815,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.015445960685610771,
"eval_runtime": 22.4067,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 360000
},
{
"epoch": 37.05604598645042,
"grad_norm": 0.2573050260543823,
"learning_rate": 0.00012970582184735653,
"loss": 0.0288,
"step": 361000
},
{
"epoch": 37.05604598645042,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.015334886498749256,
"eval_runtime": 22.4559,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 361000
},
{
"epoch": 37.158694313282695,
"grad_norm": 0.3997851610183716,
"learning_rate": 0.00012867722690804363,
"loss": 0.0285,
"step": 362000
},
{
"epoch": 37.158694313282695,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.015184923075139523,
"eval_runtime": 22.5665,
"eval_samples_per_second": 3.988,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03663985701519214,
"step": 362000
},
{
"epoch": 37.26134264011497,
"grad_norm": 0.2915215790271759,
"learning_rate": 0.00012764863196873072,
"loss": 0.0287,
"step": 363000
},
{
"epoch": 37.26134264011497,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.01511069294065237,
"eval_runtime": 22.7402,
"eval_samples_per_second": 3.958,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 363000
},
{
"epoch": 37.36399096694724,
"grad_norm": 0.1808217316865921,
"learning_rate": 0.0001266200370294178,
"loss": 0.0288,
"step": 364000
},
{
"epoch": 37.36399096694724,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.015011030249297619,
"eval_runtime": 22.3683,
"eval_samples_per_second": 4.024,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 364000
},
{
"epoch": 37.46663929377951,
"grad_norm": 0.2159794569015503,
"learning_rate": 0.00012559144209010493,
"loss": 0.0289,
"step": 365000
},
{
"epoch": 37.46663929377951,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.014206486754119396,
"eval_runtime": 22.2265,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.035746201966041107,
"step": 365000
},
{
"epoch": 37.56928762061178,
"grad_norm": 0.1461094170808792,
"learning_rate": 0.00012456284715079203,
"loss": 0.0291,
"step": 366000
},
{
"epoch": 37.56928762061178,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.01417731773108244,
"eval_runtime": 22.6758,
"eval_samples_per_second": 3.969,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 366000
},
{
"epoch": 37.671935947444055,
"grad_norm": 0.18427909910678864,
"learning_rate": 0.00012353425221147912,
"loss": 0.0292,
"step": 367000
},
{
"epoch": 37.671935947444055,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.014333653263747692,
"eval_runtime": 22.53,
"eval_samples_per_second": 3.995,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 367000
},
{
"epoch": 37.77458427427633,
"grad_norm": 0.17020884156227112,
"learning_rate": 0.00012250565727216624,
"loss": 0.0292,
"step": 368000
},
{
"epoch": 37.77458427427633,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.014181572012603283,
"eval_runtime": 22.4523,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 368000
},
{
"epoch": 37.8772326011086,
"grad_norm": 0.15650001168251038,
"learning_rate": 0.00012147706233285333,
"loss": 0.0289,
"step": 369000
},
{
"epoch": 37.8772326011086,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.014552351087331772,
"eval_runtime": 22.33,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 369000
},
{
"epoch": 37.979880927940876,
"grad_norm": 0.25610971450805664,
"learning_rate": 0.00012044846739354043,
"loss": 0.0293,
"step": 370000
},
{
"epoch": 37.979880927940876,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.01422215811908245,
"eval_runtime": 22.9345,
"eval_samples_per_second": 3.924,
"eval_steps_per_second": 0.044,
"eval_wer": 0.029490616621983913,
"step": 370000
},
{
"epoch": 38.08252925477315,
"grad_norm": 0.25401365756988525,
"learning_rate": 0.00011941987245422753,
"loss": 0.0286,
"step": 371000
},
{
"epoch": 38.08252925477315,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.014245335012674332,
"eval_runtime": 22.3755,
"eval_samples_per_second": 4.022,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 371000
},
{
"epoch": 38.18517758160542,
"grad_norm": 0.28409621119499207,
"learning_rate": 0.00011839127751491462,
"loss": 0.0281,
"step": 372000
},
{
"epoch": 38.18517758160542,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.014165487140417099,
"eval_runtime": 22.2381,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 372000
},
{
"epoch": 38.28782590843769,
"grad_norm": 0.20209653675556183,
"learning_rate": 0.00011736268257560173,
"loss": 0.0285,
"step": 373000
},
{
"epoch": 38.28782590843769,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.014026058837771416,
"eval_runtime": 22.4373,
"eval_samples_per_second": 4.011,
"eval_steps_per_second": 0.045,
"eval_wer": 0.029490616621983913,
"step": 373000
},
{
"epoch": 38.39047423526996,
"grad_norm": 0.2063857465982437,
"learning_rate": 0.00011633408763628884,
"loss": 0.029,
"step": 374000
},
{
"epoch": 38.39047423526996,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.014216107316315174,
"eval_runtime": 22.5613,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 374000
},
{
"epoch": 38.493122562102236,
"grad_norm": 0.1818021684885025,
"learning_rate": 0.00011530549269697593,
"loss": 0.0285,
"step": 375000
},
{
"epoch": 38.493122562102236,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.014219836331903934,
"eval_runtime": 22.5037,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 375000
},
{
"epoch": 38.59577088893451,
"grad_norm": 0.18460506200790405,
"learning_rate": 0.00011427689775766304,
"loss": 0.0285,
"step": 376000
},
{
"epoch": 38.59577088893451,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.013656863942742348,
"eval_runtime": 22.652,
"eval_samples_per_second": 3.973,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 376000
},
{
"epoch": 38.69841921576678,
"grad_norm": 0.227520152926445,
"learning_rate": 0.00011324830281835013,
"loss": 0.0288,
"step": 377000
},
{
"epoch": 38.69841921576678,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.01367582194507122,
"eval_runtime": 22.645,
"eval_samples_per_second": 3.974,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 377000
},
{
"epoch": 38.80106754259906,
"grad_norm": 0.1951012909412384,
"learning_rate": 0.00011221970787903724,
"loss": 0.0287,
"step": 378000
},
{
"epoch": 38.80106754259906,
"eval_cer": 0.008738151658767773,
"eval_loss": 0.014322535134851933,
"eval_runtime": 22.4631,
"eval_samples_per_second": 4.007,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 378000
},
{
"epoch": 38.90371586943133,
"grad_norm": 0.2870897352695465,
"learning_rate": 0.00011119111293972433,
"loss": 0.0288,
"step": 379000
},
{
"epoch": 38.90371586943133,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.01387989055365324,
"eval_runtime": 22.1509,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 379000
},
{
"epoch": 39.006364196263604,
"grad_norm": 0.1880166083574295,
"learning_rate": 0.00011016251800041144,
"loss": 0.0285,
"step": 380000
},
{
"epoch": 39.006364196263604,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.014345232397317886,
"eval_runtime": 22.2937,
"eval_samples_per_second": 4.037,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 380000
},
{
"epoch": 39.10901252309587,
"grad_norm": 0.30459660291671753,
"learning_rate": 0.00010913392306109854,
"loss": 0.0279,
"step": 381000
},
{
"epoch": 39.10901252309587,
"eval_cer": 0.008590047393364929,
"eval_loss": 0.013818979263305664,
"eval_runtime": 22.1693,
"eval_samples_per_second": 4.06,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03485254691689008,
"step": 381000
},
{
"epoch": 39.211660849928144,
"grad_norm": 0.2618006765842438,
"learning_rate": 0.00010810532812178564,
"loss": 0.0281,
"step": 382000
},
{
"epoch": 39.211660849928144,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.013895167037844658,
"eval_runtime": 22.6129,
"eval_samples_per_second": 3.98,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 382000
},
{
"epoch": 39.31430917676042,
"grad_norm": 0.1827547401189804,
"learning_rate": 0.00010707673318247274,
"loss": 0.0281,
"step": 383000
},
{
"epoch": 39.31430917676042,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.013730869628489017,
"eval_runtime": 22.5617,
"eval_samples_per_second": 3.989,
"eval_steps_per_second": 0.044,
"eval_wer": 0.028596961572832886,
"step": 383000
},
{
"epoch": 39.41695750359269,
"grad_norm": 0.20615758001804352,
"learning_rate": 0.00010604813824315984,
"loss": 0.0283,
"step": 384000
},
{
"epoch": 39.41695750359269,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.014077040366828442,
"eval_runtime": 22.4073,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 384000
},
{
"epoch": 39.519605830424965,
"grad_norm": 0.28781017661094666,
"learning_rate": 0.00010501954330384694,
"loss": 0.0284,
"step": 385000
},
{
"epoch": 39.519605830424965,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013616991229355335,
"eval_runtime": 22.4299,
"eval_samples_per_second": 4.013,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 385000
},
{
"epoch": 39.62225415725724,
"grad_norm": 0.1402323842048645,
"learning_rate": 0.00010399094836453404,
"loss": 0.0287,
"step": 386000
},
{
"epoch": 39.62225415725724,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.013439136557281017,
"eval_runtime": 22.6508,
"eval_samples_per_second": 3.973,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 386000
},
{
"epoch": 39.72490248408951,
"grad_norm": 0.22864773869514465,
"learning_rate": 0.00010296235342522114,
"loss": 0.0286,
"step": 387000
},
{
"epoch": 39.72490248408951,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.01389585342258215,
"eval_runtime": 22.2256,
"eval_samples_per_second": 4.049,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 387000
},
{
"epoch": 39.827550810921785,
"grad_norm": 0.20083709061145782,
"learning_rate": 0.00010193375848590825,
"loss": 0.0287,
"step": 388000
},
{
"epoch": 39.827550810921785,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.014366346411406994,
"eval_runtime": 22.4657,
"eval_samples_per_second": 4.006,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 388000
},
{
"epoch": 39.93019913775405,
"grad_norm": 0.26983821392059326,
"learning_rate": 0.00010090516354659534,
"loss": 0.0285,
"step": 389000
},
{
"epoch": 39.93019913775405,
"eval_cer": 0.008145734597156399,
"eval_loss": 0.014381532557308674,
"eval_runtime": 22.541,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 389000
},
{
"epoch": 40.032847464586325,
"grad_norm": 0.361680269241333,
"learning_rate": 9.987656860728245e-05,
"loss": 0.0278,
"step": 390000
},
{
"epoch": 40.032847464586325,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.01420989166945219,
"eval_runtime": 22.5652,
"eval_samples_per_second": 3.988,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 390000
},
{
"epoch": 40.1354957914186,
"grad_norm": 0.17232652008533478,
"learning_rate": 9.884797366796956e-05,
"loss": 0.0276,
"step": 391000
},
{
"epoch": 40.1354957914186,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.014762550592422485,
"eval_runtime": 22.4968,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 391000
},
{
"epoch": 40.23814411825087,
"grad_norm": 0.35826170444488525,
"learning_rate": 9.781937872865666e-05,
"loss": 0.0278,
"step": 392000
},
{
"epoch": 40.23814411825087,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.0146293630823493,
"eval_runtime": 22.6001,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 392000
},
{
"epoch": 40.340792445083146,
"grad_norm": 0.41236042976379395,
"learning_rate": 9.679078378934376e-05,
"loss": 0.0279,
"step": 393000
},
{
"epoch": 40.340792445083146,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.014173084869980812,
"eval_runtime": 22.3588,
"eval_samples_per_second": 4.025,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 393000
},
{
"epoch": 40.44344077191542,
"grad_norm": 0.13260366022586823,
"learning_rate": 9.576218885003086e-05,
"loss": 0.0281,
"step": 394000
},
{
"epoch": 40.44344077191542,
"eval_cer": 0.006812796208530806,
"eval_loss": 0.013978157192468643,
"eval_runtime": 22.7638,
"eval_samples_per_second": 3.954,
"eval_steps_per_second": 0.044,
"eval_wer": 0.028596961572832886,
"step": 394000
},
{
"epoch": 40.54608909874769,
"grad_norm": 0.1822642832994461,
"learning_rate": 9.473359391071797e-05,
"loss": 0.0282,
"step": 395000
},
{
"epoch": 40.54608909874769,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.014006286859512329,
"eval_runtime": 23.5638,
"eval_samples_per_second": 3.819,
"eval_steps_per_second": 0.042,
"eval_wer": 0.03306523681858802,
"step": 395000
},
{
"epoch": 40.64873742557997,
"grad_norm": 0.2515232563018799,
"learning_rate": 9.370499897140506e-05,
"loss": 0.0282,
"step": 396000
},
{
"epoch": 40.64873742557997,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013893728144466877,
"eval_runtime": 23.3851,
"eval_samples_per_second": 3.849,
"eval_steps_per_second": 0.043,
"eval_wer": 0.029490616621983913,
"step": 396000
},
{
"epoch": 40.75138575241223,
"grad_norm": 0.24263253808021545,
"learning_rate": 9.267640403209217e-05,
"loss": 0.0282,
"step": 397000
},
{
"epoch": 40.75138575241223,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.013497700914740562,
"eval_runtime": 22.831,
"eval_samples_per_second": 3.942,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 397000
},
{
"epoch": 40.85403407924451,
"grad_norm": 0.3449910581111908,
"learning_rate": 9.164780909277926e-05,
"loss": 0.0281,
"step": 398000
},
{
"epoch": 40.85403407924451,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.014030919410288334,
"eval_runtime": 22.9536,
"eval_samples_per_second": 3.921,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 398000
},
{
"epoch": 40.95668240607678,
"grad_norm": 0.34806039929389954,
"learning_rate": 9.061921415346637e-05,
"loss": 0.0283,
"step": 399000
},
{
"epoch": 40.95668240607678,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013621107675135136,
"eval_runtime": 22.7573,
"eval_samples_per_second": 3.955,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 399000
},
{
"epoch": 41.05933073290905,
"grad_norm": 0.18996162712574005,
"learning_rate": 8.959061921415348e-05,
"loss": 0.0275,
"step": 400000
},
{
"epoch": 41.05933073290905,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.013989981263875961,
"eval_runtime": 22.345,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 400000
},
{
"epoch": 41.16197905974133,
"grad_norm": 0.22895024716854095,
"learning_rate": 8.856202427484057e-05,
"loss": 0.0278,
"step": 401000
},
{
"epoch": 41.16197905974133,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.013726425357162952,
"eval_runtime": 22.1918,
"eval_samples_per_second": 4.056,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 401000
},
{
"epoch": 41.2646273865736,
"grad_norm": 0.20046815276145935,
"learning_rate": 8.753342933552768e-05,
"loss": 0.0276,
"step": 402000
},
{
"epoch": 41.2646273865736,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.014126642607152462,
"eval_runtime": 22.5351,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 402000
},
{
"epoch": 41.367275713405874,
"grad_norm": 0.2250887155532837,
"learning_rate": 8.650483439621477e-05,
"loss": 0.0277,
"step": 403000
},
{
"epoch": 41.367275713405874,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.013941235840320587,
"eval_runtime": 22.4208,
"eval_samples_per_second": 4.014,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 403000
},
{
"epoch": 41.46992404023814,
"grad_norm": 0.20660826563835144,
"learning_rate": 8.547623945690188e-05,
"loss": 0.0278,
"step": 404000
},
{
"epoch": 41.46992404023814,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013417248614132404,
"eval_runtime": 22.349,
"eval_samples_per_second": 4.027,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 404000
},
{
"epoch": 41.572572367070414,
"grad_norm": 0.20120590925216675,
"learning_rate": 8.444764451758897e-05,
"loss": 0.0278,
"step": 405000
},
{
"epoch": 41.572572367070414,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.01310759037733078,
"eval_runtime": 22.3447,
"eval_samples_per_second": 4.028,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 405000
},
{
"epoch": 41.67522069390269,
"grad_norm": 0.2711530327796936,
"learning_rate": 8.341904957827608e-05,
"loss": 0.0275,
"step": 406000
},
{
"epoch": 41.67522069390269,
"eval_cer": 0.006664691943127962,
"eval_loss": 0.012631074525415897,
"eval_runtime": 22.6139,
"eval_samples_per_second": 3.98,
"eval_steps_per_second": 0.044,
"eval_wer": 0.028596961572832886,
"step": 406000
},
{
"epoch": 41.77786902073496,
"grad_norm": 0.16983696818351746,
"learning_rate": 8.239045463896318e-05,
"loss": 0.028,
"step": 407000
},
{
"epoch": 41.77786902073496,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012849998660385609,
"eval_runtime": 22.4766,
"eval_samples_per_second": 4.004,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 407000
},
{
"epoch": 41.880517347567235,
"grad_norm": 0.1700281947851181,
"learning_rate": 8.136185969965028e-05,
"loss": 0.028,
"step": 408000
},
{
"epoch": 41.880517347567235,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012665662914514542,
"eval_runtime": 22.4018,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 408000
},
{
"epoch": 41.98316567439951,
"grad_norm": 0.23489758372306824,
"learning_rate": 8.033326476033738e-05,
"loss": 0.0278,
"step": 409000
},
{
"epoch": 41.98316567439951,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.013263718225061893,
"eval_runtime": 22.3882,
"eval_samples_per_second": 4.02,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 409000
},
{
"epoch": 42.08581400123178,
"grad_norm": 0.2607017159461975,
"learning_rate": 7.930466982102448e-05,
"loss": 0.0276,
"step": 410000
},
{
"epoch": 42.08581400123178,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013156604953110218,
"eval_runtime": 22.6758,
"eval_samples_per_second": 3.969,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 410000
},
{
"epoch": 42.188462328064055,
"grad_norm": 0.20945683121681213,
"learning_rate": 7.827607488171158e-05,
"loss": 0.0272,
"step": 411000
},
{
"epoch": 42.188462328064055,
"eval_cer": 0.006664691943127962,
"eval_loss": 0.012946737930178642,
"eval_runtime": 22.4875,
"eval_samples_per_second": 4.002,
"eval_steps_per_second": 0.044,
"eval_wer": 0.028596961572832886,
"step": 411000
},
{
"epoch": 42.29111065489632,
"grad_norm": 0.25923091173171997,
"learning_rate": 7.724747994239868e-05,
"loss": 0.0274,
"step": 412000
},
{
"epoch": 42.29111065489632,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.012892471626400948,
"eval_runtime": 22.5377,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.029490616621983913,
"step": 412000
},
{
"epoch": 42.393758981728595,
"grad_norm": 0.2663125693798065,
"learning_rate": 7.621888500308578e-05,
"loss": 0.0273,
"step": 413000
},
{
"epoch": 42.393758981728595,
"eval_cer": 0.006664691943127962,
"eval_loss": 0.012838827446103096,
"eval_runtime": 22.3627,
"eval_samples_per_second": 4.025,
"eval_steps_per_second": 0.045,
"eval_wer": 0.029490616621983913,
"step": 413000
},
{
"epoch": 42.49640730856087,
"grad_norm": 0.1551075577735901,
"learning_rate": 7.519029006377289e-05,
"loss": 0.0275,
"step": 414000
},
{
"epoch": 42.49640730856087,
"eval_cer": 0.006664691943127962,
"eval_loss": 0.01299965288490057,
"eval_runtime": 22.7331,
"eval_samples_per_second": 3.959,
"eval_steps_per_second": 0.044,
"eval_wer": 0.029490616621983913,
"step": 414000
},
{
"epoch": 42.59905563539314,
"grad_norm": 0.16298335790634155,
"learning_rate": 7.416169512445998e-05,
"loss": 0.0275,
"step": 415000
},
{
"epoch": 42.59905563539314,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013336721807718277,
"eval_runtime": 22.767,
"eval_samples_per_second": 3.953,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 415000
},
{
"epoch": 42.701703962225416,
"grad_norm": 0.20813694596290588,
"learning_rate": 7.313310018514709e-05,
"loss": 0.0278,
"step": 416000
},
{
"epoch": 42.701703962225416,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.0134715735912323,
"eval_runtime": 22.6511,
"eval_samples_per_second": 3.973,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 416000
},
{
"epoch": 42.80435228905769,
"grad_norm": 0.16702768206596375,
"learning_rate": 7.210450524583418e-05,
"loss": 0.0279,
"step": 417000
},
{
"epoch": 42.80435228905769,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.013460041023790836,
"eval_runtime": 22.4175,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03306523681858802,
"step": 417000
},
{
"epoch": 42.90700061588996,
"grad_norm": 0.2635751962661743,
"learning_rate": 7.107591030652129e-05,
"loss": 0.0273,
"step": 418000
},
{
"epoch": 42.90700061588996,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.013223396614193916,
"eval_runtime": 22.8666,
"eval_samples_per_second": 3.936,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 418000
},
{
"epoch": 43.00964894272224,
"grad_norm": 0.1874234676361084,
"learning_rate": 7.004731536720838e-05,
"loss": 0.0276,
"step": 419000
},
{
"epoch": 43.00964894272224,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013210386037826538,
"eval_runtime": 22.5237,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 419000
},
{
"epoch": 43.1122972695545,
"grad_norm": 0.16425500810146332,
"learning_rate": 6.901872042789549e-05,
"loss": 0.0269,
"step": 420000
},
{
"epoch": 43.1122972695545,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013268969021737576,
"eval_runtime": 22.5906,
"eval_samples_per_second": 3.984,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 420000
},
{
"epoch": 43.21494559638678,
"grad_norm": 0.14309245347976685,
"learning_rate": 6.79901254885826e-05,
"loss": 0.0273,
"step": 421000
},
{
"epoch": 43.21494559638678,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.012923350557684898,
"eval_runtime": 22.6017,
"eval_samples_per_second": 3.982,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 421000
},
{
"epoch": 43.31759392321905,
"grad_norm": 0.3173372447490692,
"learning_rate": 6.69615305492697e-05,
"loss": 0.0271,
"step": 422000
},
{
"epoch": 43.31759392321905,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.013023743405938148,
"eval_runtime": 22.4573,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 422000
},
{
"epoch": 43.420242250051324,
"grad_norm": 0.2641269266605377,
"learning_rate": 6.593293560995681e-05,
"loss": 0.0274,
"step": 423000
},
{
"epoch": 43.420242250051324,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.013114223256707191,
"eval_runtime": 22.3329,
"eval_samples_per_second": 4.03,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 423000
},
{
"epoch": 43.5228905768836,
"grad_norm": 0.20256099104881287,
"learning_rate": 6.49043406706439e-05,
"loss": 0.0273,
"step": 424000
},
{
"epoch": 43.5228905768836,
"eval_cer": 0.008441943127962086,
"eval_loss": 0.013389894738793373,
"eval_runtime": 22.4353,
"eval_samples_per_second": 4.012,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 424000
},
{
"epoch": 43.62553890371587,
"grad_norm": 0.2633448541164398,
"learning_rate": 6.387574573133101e-05,
"loss": 0.0273,
"step": 425000
},
{
"epoch": 43.62553890371587,
"eval_cer": 0.008293838862559242,
"eval_loss": 0.013175377622246742,
"eval_runtime": 22.0476,
"eval_samples_per_second": 4.082,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 425000
},
{
"epoch": 43.728187230548144,
"grad_norm": 0.14357689023017883,
"learning_rate": 6.28471507920181e-05,
"loss": 0.0271,
"step": 426000
},
{
"epoch": 43.728187230548144,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013218458741903305,
"eval_runtime": 22.1794,
"eval_samples_per_second": 4.058,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 426000
},
{
"epoch": 43.83083555738042,
"grad_norm": 0.20153464376926422,
"learning_rate": 6.181855585270521e-05,
"loss": 0.0275,
"step": 427000
},
{
"epoch": 43.83083555738042,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013227393850684166,
"eval_runtime": 22.262,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 427000
},
{
"epoch": 43.933483884212684,
"grad_norm": 0.1699199080467224,
"learning_rate": 6.07899609133923e-05,
"loss": 0.0272,
"step": 428000
},
{
"epoch": 43.933483884212684,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013098928146064281,
"eval_runtime": 22.2399,
"eval_samples_per_second": 4.047,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 428000
},
{
"epoch": 44.03613221104496,
"grad_norm": 0.21964909136295319,
"learning_rate": 5.97613659740794e-05,
"loss": 0.0268,
"step": 429000
},
{
"epoch": 44.03613221104496,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013152836821973324,
"eval_runtime": 22.2431,
"eval_samples_per_second": 4.046,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 429000
},
{
"epoch": 44.13878053787723,
"grad_norm": 0.2735491693019867,
"learning_rate": 5.873277103476652e-05,
"loss": 0.0265,
"step": 430000
},
{
"epoch": 44.13878053787723,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013426227495074272,
"eval_runtime": 22.6558,
"eval_samples_per_second": 3.973,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 430000
},
{
"epoch": 44.241428864709505,
"grad_norm": 0.18371999263763428,
"learning_rate": 5.7704176095453617e-05,
"loss": 0.0269,
"step": 431000
},
{
"epoch": 44.241428864709505,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013290656730532646,
"eval_runtime": 22.5067,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 431000
},
{
"epoch": 44.34407719154178,
"grad_norm": 0.3648182451725006,
"learning_rate": 5.6675581156140717e-05,
"loss": 0.0269,
"step": 432000
},
{
"epoch": 44.34407719154178,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.013338846154510975,
"eval_runtime": 22.5224,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 432000
},
{
"epoch": 44.44672551837405,
"grad_norm": 0.14803066849708557,
"learning_rate": 5.5646986216827816e-05,
"loss": 0.0272,
"step": 433000
},
{
"epoch": 44.44672551837405,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.013501363806426525,
"eval_runtime": 22.4789,
"eval_samples_per_second": 4.004,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 433000
},
{
"epoch": 44.549373845206325,
"grad_norm": 0.13751809298992157,
"learning_rate": 5.4618391277514916e-05,
"loss": 0.0275,
"step": 434000
},
{
"epoch": 44.549373845206325,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.013389664702117443,
"eval_runtime": 22.5113,
"eval_samples_per_second": 3.998,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 434000
},
{
"epoch": 44.6520221720386,
"grad_norm": 0.18634863197803497,
"learning_rate": 5.3589796338202016e-05,
"loss": 0.0271,
"step": 435000
},
{
"epoch": 44.6520221720386,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.013616056181490421,
"eval_runtime": 22.6218,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 435000
},
{
"epoch": 44.754670498870865,
"grad_norm": 0.20694060623645782,
"learning_rate": 5.2561201398889116e-05,
"loss": 0.0272,
"step": 436000
},
{
"epoch": 44.754670498870865,
"eval_cer": 0.007997630331753554,
"eval_loss": 0.013776379637420177,
"eval_runtime": 22.7323,
"eval_samples_per_second": 3.959,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 436000
},
{
"epoch": 44.85731882570314,
"grad_norm": 0.22977876663208008,
"learning_rate": 5.153260645957622e-05,
"loss": 0.0269,
"step": 437000
},
{
"epoch": 44.85731882570314,
"eval_cer": 0.006664691943127962,
"eval_loss": 0.01339475717395544,
"eval_runtime": 22.3891,
"eval_samples_per_second": 4.02,
"eval_steps_per_second": 0.045,
"eval_wer": 0.028596961572832886,
"step": 437000
},
{
"epoch": 44.95996715253541,
"grad_norm": 0.16584943234920502,
"learning_rate": 5.050401152026332e-05,
"loss": 0.0269,
"step": 438000
},
{
"epoch": 44.95996715253541,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.013209763914346695,
"eval_runtime": 22.5389,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 438000
},
{
"epoch": 45.062615479367686,
"grad_norm": 0.2402484118938446,
"learning_rate": 4.947541658095042e-05,
"loss": 0.0268,
"step": 439000
},
{
"epoch": 45.062615479367686,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013325601816177368,
"eval_runtime": 22.5418,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 439000
},
{
"epoch": 45.16526380619996,
"grad_norm": 0.18944856524467468,
"learning_rate": 4.844682164163752e-05,
"loss": 0.0265,
"step": 440000
},
{
"epoch": 45.16526380619996,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012947525829076767,
"eval_runtime": 22.3674,
"eval_samples_per_second": 4.024,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03127792672028597,
"step": 440000
},
{
"epoch": 45.26791213303223,
"grad_norm": 0.18352921307086945,
"learning_rate": 4.741822670232462e-05,
"loss": 0.0265,
"step": 441000
},
{
"epoch": 45.26791213303223,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.012937680818140507,
"eval_runtime": 22.3946,
"eval_samples_per_second": 4.019,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 441000
},
{
"epoch": 45.37056045986451,
"grad_norm": 0.2022509127855301,
"learning_rate": 4.638963176301172e-05,
"loss": 0.0266,
"step": 442000
},
{
"epoch": 45.37056045986451,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012964108027517796,
"eval_runtime": 22.5768,
"eval_samples_per_second": 3.986,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 442000
},
{
"epoch": 45.47320878669678,
"grad_norm": 0.3211962878704071,
"learning_rate": 4.536103682369882e-05,
"loss": 0.0269,
"step": 443000
},
{
"epoch": 45.47320878669678,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.01303518284112215,
"eval_runtime": 22.4387,
"eval_samples_per_second": 4.011,
"eval_steps_per_second": 0.045,
"eval_wer": 0.029490616621983913,
"step": 443000
},
{
"epoch": 45.57585711352905,
"grad_norm": 0.1719512939453125,
"learning_rate": 4.433244188438593e-05,
"loss": 0.0268,
"step": 444000
},
{
"epoch": 45.57585711352905,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012754334136843681,
"eval_runtime": 22.842,
"eval_samples_per_second": 3.94,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 444000
},
{
"epoch": 45.67850544036132,
"grad_norm": 0.1725946068763733,
"learning_rate": 4.330384694507303e-05,
"loss": 0.0265,
"step": 445000
},
{
"epoch": 45.67850544036132,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.012887077406048775,
"eval_runtime": 22.7726,
"eval_samples_per_second": 3.952,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 445000
},
{
"epoch": 45.781153767193594,
"grad_norm": 0.17726309597492218,
"learning_rate": 4.2275252005760136e-05,
"loss": 0.0269,
"step": 446000
},
{
"epoch": 45.781153767193594,
"eval_cer": 0.006812796208530806,
"eval_loss": 0.012967323884367943,
"eval_runtime": 22.8533,
"eval_samples_per_second": 3.938,
"eval_steps_per_second": 0.044,
"eval_wer": 0.029490616621983913,
"step": 446000
},
{
"epoch": 45.88380209402587,
"grad_norm": 0.2569744288921356,
"learning_rate": 4.1246657066447236e-05,
"loss": 0.0268,
"step": 447000
},
{
"epoch": 45.88380209402587,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.013006458058953285,
"eval_runtime": 22.479,
"eval_samples_per_second": 4.004,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 447000
},
{
"epoch": 45.98645042085814,
"grad_norm": 0.18339526653289795,
"learning_rate": 4.0218062127134336e-05,
"loss": 0.0272,
"step": 448000
},
{
"epoch": 45.98645042085814,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012899891473352909,
"eval_runtime": 22.2876,
"eval_samples_per_second": 4.038,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 448000
},
{
"epoch": 46.089098747690414,
"grad_norm": 0.21979086101055145,
"learning_rate": 3.9189467187821436e-05,
"loss": 0.0266,
"step": 449000
},
{
"epoch": 46.089098747690414,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.012851923704147339,
"eval_runtime": 22.1005,
"eval_samples_per_second": 4.072,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 449000
},
{
"epoch": 46.19174707452269,
"grad_norm": 0.18747855722904205,
"learning_rate": 3.816087224850854e-05,
"loss": 0.0263,
"step": 450000
},
{
"epoch": 46.19174707452269,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.012800745666027069,
"eval_runtime": 22.2631,
"eval_samples_per_second": 4.043,
"eval_steps_per_second": 0.045,
"eval_wer": 0.032171581769437,
"step": 450000
},
{
"epoch": 46.29439540135496,
"grad_norm": 0.20750053226947784,
"learning_rate": 3.713227730919564e-05,
"loss": 0.0263,
"step": 451000
},
{
"epoch": 46.29439540135496,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012725234031677246,
"eval_runtime": 22.496,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 451000
},
{
"epoch": 46.39704372818723,
"grad_norm": 0.19758926331996918,
"learning_rate": 3.610368236988274e-05,
"loss": 0.027,
"step": 452000
},
{
"epoch": 46.39704372818723,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012631443329155445,
"eval_runtime": 22.4978,
"eval_samples_per_second": 4.0,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 452000
},
{
"epoch": 46.4996920550195,
"grad_norm": 0.23830477893352509,
"learning_rate": 3.507508743056984e-05,
"loss": 0.0267,
"step": 453000
},
{
"epoch": 46.4996920550195,
"eval_cer": 0.007701421800947867,
"eval_loss": 0.012887844815850258,
"eval_runtime": 22.6407,
"eval_samples_per_second": 3.975,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 453000
},
{
"epoch": 46.602340381851775,
"grad_norm": 0.22783514857292175,
"learning_rate": 3.404649249125694e-05,
"loss": 0.026,
"step": 454000
},
{
"epoch": 46.602340381851775,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.012926483526825905,
"eval_runtime": 22.7995,
"eval_samples_per_second": 3.947,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 454000
},
{
"epoch": 46.70498870868405,
"grad_norm": 0.17195259034633636,
"learning_rate": 3.301789755194404e-05,
"loss": 0.0269,
"step": 455000
},
{
"epoch": 46.70498870868405,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.012897716835141182,
"eval_runtime": 22.735,
"eval_samples_per_second": 3.959,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 455000
},
{
"epoch": 46.80763703551632,
"grad_norm": 0.21375824511051178,
"learning_rate": 3.198930261263114e-05,
"loss": 0.0267,
"step": 456000
},
{
"epoch": 46.80763703551632,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.01305030845105648,
"eval_runtime": 22.6049,
"eval_samples_per_second": 3.981,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03395889186773905,
"step": 456000
},
{
"epoch": 46.910285362348596,
"grad_norm": 0.2291615605354309,
"learning_rate": 3.096070767331825e-05,
"loss": 0.0269,
"step": 457000
},
{
"epoch": 46.910285362348596,
"eval_cer": 0.007553317535545024,
"eval_loss": 0.013074580579996109,
"eval_runtime": 22.4586,
"eval_samples_per_second": 4.007,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03395889186773905,
"step": 457000
},
{
"epoch": 47.01293368918087,
"grad_norm": 0.18737538158893585,
"learning_rate": 2.993211273400535e-05,
"loss": 0.0266,
"step": 458000
},
{
"epoch": 47.01293368918087,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.01272270642220974,
"eval_runtime": 22.7233,
"eval_samples_per_second": 3.961,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 458000
},
{
"epoch": 47.115582016013136,
"grad_norm": 0.20762291550636292,
"learning_rate": 2.8903517794692452e-05,
"loss": 0.0263,
"step": 459000
},
{
"epoch": 47.115582016013136,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.01276144664734602,
"eval_runtime": 22.6501,
"eval_samples_per_second": 3.973,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 459000
},
{
"epoch": 47.21823034284541,
"grad_norm": 0.22940364480018616,
"learning_rate": 2.7874922855379552e-05,
"loss": 0.0263,
"step": 460000
},
{
"epoch": 47.21823034284541,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012876234017312527,
"eval_runtime": 22.497,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 460000
},
{
"epoch": 47.32087866967768,
"grad_norm": 0.1963769495487213,
"learning_rate": 2.6846327916066652e-05,
"loss": 0.0262,
"step": 461000
},
{
"epoch": 47.32087866967768,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012759842909872532,
"eval_runtime": 22.4933,
"eval_samples_per_second": 4.001,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 461000
},
{
"epoch": 47.423526996509956,
"grad_norm": 0.13898225128650665,
"learning_rate": 2.5817732976753755e-05,
"loss": 0.0262,
"step": 462000
},
{
"epoch": 47.423526996509956,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012645237147808075,
"eval_runtime": 22.7141,
"eval_samples_per_second": 3.962,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 462000
},
{
"epoch": 47.52617532334223,
"grad_norm": 0.295411616563797,
"learning_rate": 2.4789138037440855e-05,
"loss": 0.0264,
"step": 463000
},
{
"epoch": 47.52617532334223,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012794570997357368,
"eval_runtime": 22.5479,
"eval_samples_per_second": 3.992,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 463000
},
{
"epoch": 47.6288236501745,
"grad_norm": 0.2703556418418884,
"learning_rate": 2.3760543098127955e-05,
"loss": 0.0264,
"step": 464000
},
{
"epoch": 47.6288236501745,
"eval_cer": 0.00784952606635071,
"eval_loss": 0.012676162645220757,
"eval_runtime": 22.746,
"eval_samples_per_second": 3.957,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 464000
},
{
"epoch": 47.73147197700678,
"grad_norm": 0.20613588392734528,
"learning_rate": 2.2731948158815062e-05,
"loss": 0.0267,
"step": 465000
},
{
"epoch": 47.73147197700678,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012650624848902225,
"eval_runtime": 22.6051,
"eval_samples_per_second": 3.981,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 465000
},
{
"epoch": 47.83412030383905,
"grad_norm": 0.19190002977848053,
"learning_rate": 2.1703353219502162e-05,
"loss": 0.0261,
"step": 466000
},
{
"epoch": 47.83412030383905,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012826332822442055,
"eval_runtime": 22.6709,
"eval_samples_per_second": 3.97,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03306523681858802,
"step": 466000
},
{
"epoch": 47.93676863067132,
"grad_norm": 0.1679309457540512,
"learning_rate": 2.0674758280189262e-05,
"loss": 0.0262,
"step": 467000
},
{
"epoch": 47.93676863067132,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.012559423223137856,
"eval_runtime": 22.4179,
"eval_samples_per_second": 4.015,
"eval_steps_per_second": 0.045,
"eval_wer": 0.029490616621983913,
"step": 467000
},
{
"epoch": 48.03941695750359,
"grad_norm": 0.21128496527671814,
"learning_rate": 1.9646163340876362e-05,
"loss": 0.0264,
"step": 468000
},
{
"epoch": 48.03941695750359,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.01262712012976408,
"eval_runtime": 22.5251,
"eval_samples_per_second": 3.996,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 468000
},
{
"epoch": 48.142065284335864,
"grad_norm": 0.23150426149368286,
"learning_rate": 1.8617568401563465e-05,
"loss": 0.0263,
"step": 469000
},
{
"epoch": 48.142065284335864,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012799305841326714,
"eval_runtime": 22.402,
"eval_samples_per_second": 4.018,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 469000
},
{
"epoch": 48.24471361116814,
"grad_norm": 0.23995615541934967,
"learning_rate": 1.7588973462250565e-05,
"loss": 0.0259,
"step": 470000
},
{
"epoch": 48.24471361116814,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.012804466299712658,
"eval_runtime": 22.5738,
"eval_samples_per_second": 3.987,
"eval_steps_per_second": 0.044,
"eval_wer": 0.029490616621983913,
"step": 470000
},
{
"epoch": 48.34736193800041,
"grad_norm": 0.2322542518377304,
"learning_rate": 1.6560378522937665e-05,
"loss": 0.0263,
"step": 471000
},
{
"epoch": 48.34736193800041,
"eval_cer": 0.006960900473933649,
"eval_loss": 0.012536253780126572,
"eval_runtime": 22.4337,
"eval_samples_per_second": 4.012,
"eval_steps_per_second": 0.045,
"eval_wer": 0.029490616621983913,
"step": 471000
},
{
"epoch": 48.450010264832684,
"grad_norm": 0.19019187986850739,
"learning_rate": 1.553178358362477e-05,
"loss": 0.0258,
"step": 472000
},
{
"epoch": 48.450010264832684,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012600379064679146,
"eval_runtime": 22.4073,
"eval_samples_per_second": 4.017,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 472000
},
{
"epoch": 48.55265859166496,
"grad_norm": 0.16290856897830963,
"learning_rate": 1.450318864431187e-05,
"loss": 0.0262,
"step": 473000
},
{
"epoch": 48.55265859166496,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012459825724363327,
"eval_runtime": 22.3028,
"eval_samples_per_second": 4.035,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 473000
},
{
"epoch": 48.65530691849723,
"grad_norm": 0.20070821046829224,
"learning_rate": 1.3474593704998972e-05,
"loss": 0.0258,
"step": 474000
},
{
"epoch": 48.65530691849723,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.01252325065433979,
"eval_runtime": 22.5396,
"eval_samples_per_second": 3.993,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 474000
},
{
"epoch": 48.7579552453295,
"grad_norm": 0.21549555659294128,
"learning_rate": 1.2445998765686073e-05,
"loss": 0.026,
"step": 475000
},
{
"epoch": 48.7579552453295,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.0124691566452384,
"eval_runtime": 22.5036,
"eval_samples_per_second": 3.999,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 475000
},
{
"epoch": 48.86060357216177,
"grad_norm": 0.27654412388801575,
"learning_rate": 1.1417403826373175e-05,
"loss": 0.0265,
"step": 476000
},
{
"epoch": 48.86060357216177,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012546148151159286,
"eval_runtime": 22.6064,
"eval_samples_per_second": 3.981,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 476000
},
{
"epoch": 48.963251898994045,
"grad_norm": 0.1903195083141327,
"learning_rate": 1.0388808887060275e-05,
"loss": 0.0262,
"step": 477000
},
{
"epoch": 48.963251898994045,
"eval_cer": 0.00740521327014218,
"eval_loss": 0.012493513524532318,
"eval_runtime": 22.4828,
"eval_samples_per_second": 4.003,
"eval_steps_per_second": 0.044,
"eval_wer": 0.032171581769437,
"step": 477000
},
{
"epoch": 49.06590022582632,
"grad_norm": 0.2395348995923996,
"learning_rate": 9.360213947747377e-06,
"loss": 0.0261,
"step": 478000
},
{
"epoch": 49.06590022582632,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012535948306322098,
"eval_runtime": 22.8324,
"eval_samples_per_second": 3.942,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 478000
},
{
"epoch": 49.16854855265859,
"grad_norm": 0.20346806943416595,
"learning_rate": 8.33161900843448e-06,
"loss": 0.0259,
"step": 479000
},
{
"epoch": 49.16854855265859,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012490322813391685,
"eval_runtime": 22.7561,
"eval_samples_per_second": 3.955,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 479000
},
{
"epoch": 49.271196879490866,
"grad_norm": 0.18711692094802856,
"learning_rate": 7.30302406912158e-06,
"loss": 0.0261,
"step": 480000
},
{
"epoch": 49.271196879490866,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.01254476048052311,
"eval_runtime": 22.575,
"eval_samples_per_second": 3.987,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 480000
},
{
"epoch": 49.37384520632314,
"grad_norm": 0.19651705026626587,
"learning_rate": 6.274429129808681e-06,
"loss": 0.0257,
"step": 481000
},
{
"epoch": 49.37384520632314,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012482382357120514,
"eval_runtime": 22.6241,
"eval_samples_per_second": 3.978,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 481000
},
{
"epoch": 49.47649353315541,
"grad_norm": 0.1909824162721634,
"learning_rate": 5.245834190495783e-06,
"loss": 0.0261,
"step": 482000
},
{
"epoch": 49.47649353315541,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012513377703726292,
"eval_runtime": 22.706,
"eval_samples_per_second": 3.964,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 482000
},
{
"epoch": 49.57914185998768,
"grad_norm": 0.18398302793502808,
"learning_rate": 4.217239251182884e-06,
"loss": 0.0262,
"step": 483000
},
{
"epoch": 49.57914185998768,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012494869530200958,
"eval_runtime": 22.545,
"eval_samples_per_second": 3.992,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 483000
},
{
"epoch": 49.68179018681995,
"grad_norm": 0.2315966933965683,
"learning_rate": 3.1886443118699856e-06,
"loss": 0.0255,
"step": 484000
},
{
"epoch": 49.68179018681995,
"eval_cer": 0.007257109004739337,
"eval_loss": 0.012507443316280842,
"eval_runtime": 22.6822,
"eval_samples_per_second": 3.968,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03127792672028597,
"step": 484000
},
{
"epoch": 49.784438513652226,
"grad_norm": 0.16276974976062775,
"learning_rate": 2.1600493725570872e-06,
"loss": 0.0258,
"step": 485000
},
{
"epoch": 49.784438513652226,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012460754252970219,
"eval_runtime": 22.3925,
"eval_samples_per_second": 4.019,
"eval_steps_per_second": 0.045,
"eval_wer": 0.03038427167113494,
"step": 485000
},
{
"epoch": 49.8870868404845,
"grad_norm": 0.16273947060108185,
"learning_rate": 1.1314544332441885e-06,
"loss": 0.0261,
"step": 486000
},
{
"epoch": 49.8870868404845,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.012488246895372868,
"eval_runtime": 22.5881,
"eval_samples_per_second": 3.984,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 486000
},
{
"epoch": 49.98973516731677,
"grad_norm": 0.2636996805667877,
"learning_rate": 1.0285949393128985e-07,
"loss": 0.0263,
"step": 487000
},
{
"epoch": 49.98973516731677,
"eval_cer": 0.0071090047393364926,
"eval_loss": 0.01250074990093708,
"eval_runtime": 22.7073,
"eval_samples_per_second": 3.963,
"eval_steps_per_second": 0.044,
"eval_wer": 0.03038427167113494,
"step": 487000
}
],
"logging_steps": 1000,
"max_steps": 487100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.703420432200581e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}