|
{ |
|
"best_metric": 0.012459825724363327, |
|
"best_model_checkpoint": "./phase3-30-ep/checkpoint-473000", |
|
"epoch": 50.0, |
|
"eval_steps": 1000, |
|
"global_step": 487100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10264832683227264, |
|
"grad_norm": 0.17517703771591187, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1055, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.10264832683227264, |
|
"eval_cer": 0.029768957345971563, |
|
"eval_loss": 0.07166381180286407, |
|
"eval_runtime": 21.4774, |
|
"eval_samples_per_second": 4.19, |
|
"eval_steps_per_second": 0.047, |
|
"eval_wer": 0.11796246648793565, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.20529665366454528, |
|
"grad_norm": 0.18113084137439728, |
|
"learning_rate": 0.0004989714050606871, |
|
"loss": 0.0829, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.20529665366454528, |
|
"eval_cer": 0.02754739336492891, |
|
"eval_loss": 0.06128456071019173, |
|
"eval_runtime": 24.2215, |
|
"eval_samples_per_second": 3.716, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.10455764075067024, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3079449804968179, |
|
"grad_norm": 0.31132909655570984, |
|
"learning_rate": 0.0004979428101213742, |
|
"loss": 0.077, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3079449804968179, |
|
"eval_cer": 0.026954976303317536, |
|
"eval_loss": 0.056063100695610046, |
|
"eval_runtime": 24.595, |
|
"eval_samples_per_second": 3.659, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.10187667560321716, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41059330732909055, |
|
"grad_norm": 0.2209460735321045, |
|
"learning_rate": 0.0004969142151820613, |
|
"loss": 0.0746, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41059330732909055, |
|
"eval_cer": 0.023548578199052133, |
|
"eval_loss": 0.05421268939971924, |
|
"eval_runtime": 39.13, |
|
"eval_samples_per_second": 2.3, |
|
"eval_steps_per_second": 0.026, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5132416341613631, |
|
"grad_norm": 0.21206562221050262, |
|
"learning_rate": 0.0004958856202427484, |
|
"loss": 0.0723, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5132416341613631, |
|
"eval_cer": 0.023548578199052133, |
|
"eval_loss": 0.053859543055295944, |
|
"eval_runtime": 39.8671, |
|
"eval_samples_per_second": 2.258, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.0902591599642538, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6158899609936358, |
|
"grad_norm": 0.1705954223871231, |
|
"learning_rate": 0.0004948570253034355, |
|
"loss": 0.0705, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6158899609936358, |
|
"eval_cer": 0.0231042654028436, |
|
"eval_loss": 0.05038898065686226, |
|
"eval_runtime": 23.9921, |
|
"eval_samples_per_second": 3.751, |
|
"eval_steps_per_second": 0.042, |
|
"eval_wer": 0.08936550491510277, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7185382878259085, |
|
"grad_norm": 0.24641267955303192, |
|
"learning_rate": 0.0004938284303641226, |
|
"loss": 0.0693, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7185382878259085, |
|
"eval_cer": 0.02428909952606635, |
|
"eval_loss": 0.04804808273911476, |
|
"eval_runtime": 25.4073, |
|
"eval_samples_per_second": 3.542, |
|
"eval_steps_per_second": 0.039, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8211866146581811, |
|
"grad_norm": 0.14618875086307526, |
|
"learning_rate": 0.0004927998354248098, |
|
"loss": 0.0687, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8211866146581811, |
|
"eval_cer": 0.022067535545023696, |
|
"eval_loss": 0.046750105917453766, |
|
"eval_runtime": 23.0625, |
|
"eval_samples_per_second": 3.902, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.08310991957104558, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9238349414904538, |
|
"grad_norm": 0.2242618203163147, |
|
"learning_rate": 0.0004917712404854969, |
|
"loss": 0.0668, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9238349414904538, |
|
"eval_cer": 0.02177132701421801, |
|
"eval_loss": 0.046149224042892456, |
|
"eval_runtime": 23.9314, |
|
"eval_samples_per_second": 3.761, |
|
"eval_steps_per_second": 0.042, |
|
"eval_wer": 0.08489722966934764, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0264832683227263, |
|
"grad_norm": 0.17396153509616852, |
|
"learning_rate": 0.000490742645546184, |
|
"loss": 0.066, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0264832683227263, |
|
"eval_cer": 0.022363744075829382, |
|
"eval_loss": 0.044813916087150574, |
|
"eval_runtime": 28.3612, |
|
"eval_samples_per_second": 3.173, |
|
"eval_steps_per_second": 0.035, |
|
"eval_wer": 0.08579088471849866, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.129131595154999, |
|
"grad_norm": 0.24261055886745453, |
|
"learning_rate": 0.000489714050606871, |
|
"loss": 0.0632, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.129131595154999, |
|
"eval_cer": 0.02295616113744076, |
|
"eval_loss": 0.04648038372397423, |
|
"eval_runtime": 36.4007, |
|
"eval_samples_per_second": 2.472, |
|
"eval_steps_per_second": 0.027, |
|
"eval_wer": 0.08757819481680071, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.2317799219872716, |
|
"grad_norm": 0.20607537031173706, |
|
"learning_rate": 0.0004886854556675581, |
|
"loss": 0.0639, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.2317799219872716, |
|
"eval_cer": 0.021475118483412322, |
|
"eval_loss": 0.04516833648085594, |
|
"eval_runtime": 40.6247, |
|
"eval_samples_per_second": 2.215, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.0840035746201966, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.3344282488195442, |
|
"grad_norm": 0.2226237952709198, |
|
"learning_rate": 0.00048765686072824524, |
|
"loss": 0.0626, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.3344282488195442, |
|
"eval_cer": 0.022363744075829382, |
|
"eval_loss": 0.04331167787313461, |
|
"eval_runtime": 43.1244, |
|
"eval_samples_per_second": 2.087, |
|
"eval_steps_per_second": 0.023, |
|
"eval_wer": 0.08668453976764968, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.437076575651817, |
|
"grad_norm": 0.22998760640621185, |
|
"learning_rate": 0.00048662826578893233, |
|
"loss": 0.0617, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.437076575651817, |
|
"eval_cer": 0.020438388625592416, |
|
"eval_loss": 0.0439009889960289, |
|
"eval_runtime": 40.051, |
|
"eval_samples_per_second": 2.247, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.08132260947274352, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.5397249024840896, |
|
"grad_norm": 0.2044006586074829, |
|
"learning_rate": 0.0004855996708496194, |
|
"loss": 0.0612, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.5397249024840896, |
|
"eval_cer": 0.018364928909952605, |
|
"eval_loss": 0.039780329912900925, |
|
"eval_runtime": 40.1221, |
|
"eval_samples_per_second": 2.243, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.642373229316362, |
|
"grad_norm": 0.25967568159103394, |
|
"learning_rate": 0.0004845710759103065, |
|
"loss": 0.0619, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.642373229316362, |
|
"eval_cer": 0.021178909952606635, |
|
"eval_loss": 0.04091305658221245, |
|
"eval_runtime": 39.0877, |
|
"eval_samples_per_second": 2.303, |
|
"eval_steps_per_second": 0.026, |
|
"eval_wer": 0.0777479892761394, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.7450215561486346, |
|
"grad_norm": 0.17572972178459167, |
|
"learning_rate": 0.0004835424809709936, |
|
"loss": 0.0617, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.7450215561486346, |
|
"eval_cer": 0.021475118483412322, |
|
"eval_loss": 0.04012183099985123, |
|
"eval_runtime": 39.7698, |
|
"eval_samples_per_second": 2.263, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.07864164432529044, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.8476698829809073, |
|
"grad_norm": 0.20715534687042236, |
|
"learning_rate": 0.0004825138860316807, |
|
"loss": 0.0607, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.8476698829809073, |
|
"eval_cer": 0.02177132701421801, |
|
"eval_loss": 0.04150845482945442, |
|
"eval_runtime": 39.4055, |
|
"eval_samples_per_second": 2.284, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.08132260947274352, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.95031820981318, |
|
"grad_norm": 0.3426735997200012, |
|
"learning_rate": 0.00048148529109236785, |
|
"loss": 0.0602, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.95031820981318, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.03899341821670532, |
|
"eval_runtime": 39.6568, |
|
"eval_samples_per_second": 2.269, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.0529665366454526, |
|
"grad_norm": 0.15929488837718964, |
|
"learning_rate": 0.00048045669615305494, |
|
"loss": 0.0585, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.0529665366454526, |
|
"eval_cer": 0.019994075829383885, |
|
"eval_loss": 0.03957120701670647, |
|
"eval_runtime": 24.5545, |
|
"eval_samples_per_second": 3.665, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.07506702412868632, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.1556148634777252, |
|
"grad_norm": 0.18062791228294373, |
|
"learning_rate": 0.00047942810121374204, |
|
"loss": 0.0579, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.1556148634777252, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.039500825107097626, |
|
"eval_runtime": 21.8989, |
|
"eval_samples_per_second": 4.11, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.258263190309998, |
|
"grad_norm": 0.20961548388004303, |
|
"learning_rate": 0.00047839950627442913, |
|
"loss": 0.0571, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.258263190309998, |
|
"eval_cer": 0.01925355450236967, |
|
"eval_loss": 0.04062485322356224, |
|
"eval_runtime": 23.0215, |
|
"eval_samples_per_second": 3.909, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.3609115171422705, |
|
"grad_norm": 0.1982312947511673, |
|
"learning_rate": 0.0004773709113351162, |
|
"loss": 0.0574, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.3609115171422705, |
|
"eval_cer": 0.0173281990521327, |
|
"eval_loss": 0.03924456238746643, |
|
"eval_runtime": 26.6274, |
|
"eval_samples_per_second": 3.38, |
|
"eval_steps_per_second": 0.038, |
|
"eval_wer": 0.064343163538874, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.463559843974543, |
|
"grad_norm": 0.26111695170402527, |
|
"learning_rate": 0.0004763423163958033, |
|
"loss": 0.0568, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.463559843974543, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.038703735917806625, |
|
"eval_runtime": 23.8875, |
|
"eval_samples_per_second": 3.768, |
|
"eval_steps_per_second": 0.042, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.566208170806816, |
|
"grad_norm": 0.20790116488933563, |
|
"learning_rate": 0.0004753137214564904, |
|
"loss": 0.0571, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.566208170806816, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.03755784407258034, |
|
"eval_runtime": 22.7427, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.064343163538874, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.6688564976390885, |
|
"grad_norm": 0.16015666723251343, |
|
"learning_rate": 0.00047428512651717756, |
|
"loss": 0.0572, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.6688564976390885, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03704160824418068, |
|
"eval_runtime": 22.6235, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.771504824471361, |
|
"grad_norm": 0.17609256505966187, |
|
"learning_rate": 0.00047325653157786465, |
|
"loss": 0.0566, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.771504824471361, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.03628876060247421, |
|
"eval_runtime": 22.4829, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.874153151303634, |
|
"grad_norm": 0.19802771508693695, |
|
"learning_rate": 0.00047222793663855174, |
|
"loss": 0.0568, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.874153151303634, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.03549469634890556, |
|
"eval_runtime": 22.5619, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.9768014781359065, |
|
"grad_norm": 0.19432678818702698, |
|
"learning_rate": 0.00047119934169923884, |
|
"loss": 0.056, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.9768014781359065, |
|
"eval_cer": 0.017180094786729858, |
|
"eval_loss": 0.03459760919213295, |
|
"eval_runtime": 22.4771, |
|
"eval_samples_per_second": 4.004, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.079449804968179, |
|
"grad_norm": 0.17181651294231415, |
|
"learning_rate": 0.00047017074675992593, |
|
"loss": 0.0546, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.079449804968179, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.03514665365219116, |
|
"eval_runtime": 23.0403, |
|
"eval_samples_per_second": 3.906, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.1820981318004518, |
|
"grad_norm": 0.29426151514053345, |
|
"learning_rate": 0.000469142151820613, |
|
"loss": 0.0545, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.1820981318004518, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.03719107061624527, |
|
"eval_runtime": 24.2578, |
|
"eval_samples_per_second": 3.71, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.2847464586327244, |
|
"grad_norm": 0.14310035109519958, |
|
"learning_rate": 0.0004681135568813001, |
|
"loss": 0.0533, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.2847464586327244, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.03583410009741783, |
|
"eval_runtime": 22.3344, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.387394785464997, |
|
"grad_norm": 0.16393882036209106, |
|
"learning_rate": 0.00046708496194198726, |
|
"loss": 0.0537, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.387394785464997, |
|
"eval_cer": 0.017476303317535545, |
|
"eval_loss": 0.03669163957238197, |
|
"eval_runtime": 22.2397, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.4900431122972697, |
|
"grad_norm": 0.1863625943660736, |
|
"learning_rate": 0.00046605636700267436, |
|
"loss": 0.0542, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.4900431122972697, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.03613027185201645, |
|
"eval_runtime": 22.4068, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.592691439129542, |
|
"grad_norm": 0.1313330978155136, |
|
"learning_rate": 0.00046502777206336145, |
|
"loss": 0.0536, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.592691439129542, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.03634100779891014, |
|
"eval_runtime": 22.5099, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.6953397659618146, |
|
"grad_norm": 0.15501771867275238, |
|
"learning_rate": 0.00046399917712404854, |
|
"loss": 0.0541, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.6953397659618146, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.03412258252501488, |
|
"eval_runtime": 22.4232, |
|
"eval_samples_per_second": 4.014, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.7979880927940872, |
|
"grad_norm": 0.1870546042919159, |
|
"learning_rate": 0.00046297058218473564, |
|
"loss": 0.0538, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.7979880927940872, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.03531961515545845, |
|
"eval_runtime": 22.4977, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.90063641962636, |
|
"grad_norm": 0.1889723688364029, |
|
"learning_rate": 0.00046194198724542273, |
|
"loss": 0.054, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.90063641962636, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.03371370583772659, |
|
"eval_runtime": 24.169, |
|
"eval_samples_per_second": 3.724, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.0032847464586325, |
|
"grad_norm": 0.17405888438224792, |
|
"learning_rate": 0.0004609133923061098, |
|
"loss": 0.0535, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.0032847464586325, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.03488326445221901, |
|
"eval_runtime": 23.3428, |
|
"eval_samples_per_second": 3.856, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.105933073290905, |
|
"grad_norm": 0.21957945823669434, |
|
"learning_rate": 0.00045988479736679697, |
|
"loss": 0.0519, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.105933073290905, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.03299200162291527, |
|
"eval_runtime": 22.9291, |
|
"eval_samples_per_second": 3.925, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.208581400123178, |
|
"grad_norm": 0.1985115259885788, |
|
"learning_rate": 0.00045885620242748406, |
|
"loss": 0.0513, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.208581400123178, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.0352088063955307, |
|
"eval_runtime": 22.3573, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.3112297269554505, |
|
"grad_norm": 0.2313787192106247, |
|
"learning_rate": 0.00045782760748817116, |
|
"loss": 0.0515, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.3112297269554505, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.03440188989043236, |
|
"eval_runtime": 23.1819, |
|
"eval_samples_per_second": 3.882, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.413878053787723, |
|
"grad_norm": 0.14888563752174377, |
|
"learning_rate": 0.00045679901254885825, |
|
"loss": 0.0512, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.413878053787723, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.03430500999093056, |
|
"eval_runtime": 22.7046, |
|
"eval_samples_per_second": 3.964, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.516526380619996, |
|
"grad_norm": 0.1658962070941925, |
|
"learning_rate": 0.00045577041760954534, |
|
"loss": 0.0513, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.516526380619996, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.03472462296485901, |
|
"eval_runtime": 22.7468, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.619174707452268, |
|
"grad_norm": 0.2193230837583542, |
|
"learning_rate": 0.00045474182267023244, |
|
"loss": 0.0516, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.619174707452268, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.03404483199119568, |
|
"eval_runtime": 22.3387, |
|
"eval_samples_per_second": 4.029, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.721823034284541, |
|
"grad_norm": 0.2104436755180359, |
|
"learning_rate": 0.00045371322773091953, |
|
"loss": 0.0515, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.721823034284541, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.033337026834487915, |
|
"eval_runtime": 22.8456, |
|
"eval_samples_per_second": 3.939, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.824471361116814, |
|
"grad_norm": 0.18940144777297974, |
|
"learning_rate": 0.0004526846327916067, |
|
"loss": 0.0512, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.824471361116814, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.03273012861609459, |
|
"eval_runtime": 22.7058, |
|
"eval_samples_per_second": 3.964, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.927119687949086, |
|
"grad_norm": 0.1933116912841797, |
|
"learning_rate": 0.00045165603785229377, |
|
"loss": 0.0517, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.927119687949086, |
|
"eval_cer": 0.01525473933649289, |
|
"eval_loss": 0.03291744366288185, |
|
"eval_runtime": 22.6758, |
|
"eval_samples_per_second": 3.969, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.029768014781359, |
|
"grad_norm": 0.18987509608268738, |
|
"learning_rate": 0.00045062744291298086, |
|
"loss": 0.0508, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.029768014781359, |
|
"eval_cer": 0.017180094786729858, |
|
"eval_loss": 0.03271958604454994, |
|
"eval_runtime": 22.4963, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.132416341613632, |
|
"grad_norm": 0.1620320975780487, |
|
"learning_rate": 0.00044959884797366796, |
|
"loss": 0.0491, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.132416341613632, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.03121078759431839, |
|
"eval_runtime": 22.8979, |
|
"eval_samples_per_second": 3.93, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.235064668445904, |
|
"grad_norm": 0.1285402774810791, |
|
"learning_rate": 0.00044857025303435505, |
|
"loss": 0.0493, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.235064668445904, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.03293353319168091, |
|
"eval_runtime": 22.5976, |
|
"eval_samples_per_second": 3.983, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.337712995278177, |
|
"grad_norm": 0.24566827714443207, |
|
"learning_rate": 0.00044754165809504214, |
|
"loss": 0.0498, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.337712995278177, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.030513830482959747, |
|
"eval_runtime": 22.5054, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.44036132211045, |
|
"grad_norm": 0.18935276567935944, |
|
"learning_rate": 0.00044651306315572923, |
|
"loss": 0.0498, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.44036132211045, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.03185874596238136, |
|
"eval_runtime": 22.2794, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.543009648942722, |
|
"grad_norm": 0.3019377291202545, |
|
"learning_rate": 0.0004454844682164164, |
|
"loss": 0.0498, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.543009648942722, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.03230896592140198, |
|
"eval_runtime": 22.4205, |
|
"eval_samples_per_second": 4.014, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.645657975774995, |
|
"grad_norm": 0.19573438167572021, |
|
"learning_rate": 0.0004444558732771035, |
|
"loss": 0.0499, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.645657975774995, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.03094463422894478, |
|
"eval_runtime": 22.3739, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.748306302607268, |
|
"grad_norm": 0.19702386856079102, |
|
"learning_rate": 0.00044342727833779057, |
|
"loss": 0.0496, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.748306302607268, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.031046954914927483, |
|
"eval_runtime": 22.342, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.85095462943954, |
|
"grad_norm": 0.21981871128082275, |
|
"learning_rate": 0.00044239868339847766, |
|
"loss": 0.0494, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.85095462943954, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.03140529617667198, |
|
"eval_runtime": 22.392, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.953602956271813, |
|
"grad_norm": 0.1707638055086136, |
|
"learning_rate": 0.00044137008845916475, |
|
"loss": 0.0498, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.953602956271813, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.031634390354156494, |
|
"eval_runtime": 22.7229, |
|
"eval_samples_per_second": 3.961, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.056251283104086, |
|
"grad_norm": 0.18458805978298187, |
|
"learning_rate": 0.00044034149351985185, |
|
"loss": 0.0481, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.056251283104086, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.03125843033194542, |
|
"eval_runtime": 22.6851, |
|
"eval_samples_per_second": 3.967, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.158899609936358, |
|
"grad_norm": 0.176268070936203, |
|
"learning_rate": 0.00043931289858053894, |
|
"loss": 0.048, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.158899609936358, |
|
"eval_cer": 0.013477488151658768, |
|
"eval_loss": 0.0313909687101841, |
|
"eval_runtime": 22.4652, |
|
"eval_samples_per_second": 4.006, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.261547936768631, |
|
"grad_norm": 0.21893835067749023, |
|
"learning_rate": 0.0004382843036412261, |
|
"loss": 0.0481, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 6.261547936768631, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.030466848984360695, |
|
"eval_runtime": 22.4624, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 6.3641962636009035, |
|
"grad_norm": 0.17575185000896454, |
|
"learning_rate": 0.0004372557087019132, |
|
"loss": 0.0481, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.3641962636009035, |
|
"eval_cer": 0.012588862559241706, |
|
"eval_loss": 0.029415711760520935, |
|
"eval_runtime": 22.6218, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.466844590433176, |
|
"grad_norm": 0.21119283139705658, |
|
"learning_rate": 0.0004362271137626003, |
|
"loss": 0.0484, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.466844590433176, |
|
"eval_cer": 0.013329383886255925, |
|
"eval_loss": 0.030311500653624535, |
|
"eval_runtime": 22.4678, |
|
"eval_samples_per_second": 4.006, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.569492917265449, |
|
"grad_norm": 0.20543061196804047, |
|
"learning_rate": 0.0004351985188232874, |
|
"loss": 0.0475, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.569492917265449, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.030118942260742188, |
|
"eval_runtime": 22.5229, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.6721412440977215, |
|
"grad_norm": 0.4000137448310852, |
|
"learning_rate": 0.0004341699238839745, |
|
"loss": 0.0481, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.6721412440977215, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.030896518379449844, |
|
"eval_runtime": 22.5328, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.774789570929994, |
|
"grad_norm": 0.2505108118057251, |
|
"learning_rate": 0.0004331413289446616, |
|
"loss": 0.0486, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.774789570929994, |
|
"eval_cer": 0.012885071090047393, |
|
"eval_loss": 0.030706828460097313, |
|
"eval_runtime": 22.8491, |
|
"eval_samples_per_second": 3.939, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.877437897762267, |
|
"grad_norm": 0.1690637618303299, |
|
"learning_rate": 0.00043211273400534876, |
|
"loss": 0.0478, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.877437897762267, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.02974248118698597, |
|
"eval_runtime": 22.6356, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.980086224594539, |
|
"grad_norm": 0.2266341745853424, |
|
"learning_rate": 0.00043108413906603585, |
|
"loss": 0.0481, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.980086224594539, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.030792562291026115, |
|
"eval_runtime": 22.6232, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.082734551426812, |
|
"grad_norm": 0.2072857916355133, |
|
"learning_rate": 0.00043005554412672294, |
|
"loss": 0.0462, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.082734551426812, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.02916835993528366, |
|
"eval_runtime": 23.5494, |
|
"eval_samples_per_second": 3.822, |
|
"eval_steps_per_second": 0.042, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.185382878259085, |
|
"grad_norm": 0.25637751817703247, |
|
"learning_rate": 0.00042902694918741004, |
|
"loss": 0.0466, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 7.185382878259085, |
|
"eval_cer": 0.013329383886255925, |
|
"eval_loss": 0.028768625110387802, |
|
"eval_runtime": 23.0948, |
|
"eval_samples_per_second": 3.897, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 7.288031205091357, |
|
"grad_norm": 0.16115036606788635, |
|
"learning_rate": 0.00042799835424809713, |
|
"loss": 0.0465, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 7.288031205091357, |
|
"eval_cer": 0.013921800947867298, |
|
"eval_loss": 0.029431801289319992, |
|
"eval_runtime": 22.898, |
|
"eval_samples_per_second": 3.93, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 7.39067953192363, |
|
"grad_norm": 0.3072957396507263, |
|
"learning_rate": 0.0004269697593087842, |
|
"loss": 0.0464, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.39067953192363, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.028679879382252693, |
|
"eval_runtime": 22.6337, |
|
"eval_samples_per_second": 3.976, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.493327858755903, |
|
"grad_norm": 0.3598809242248535, |
|
"learning_rate": 0.0004259411643694713, |
|
"loss": 0.0472, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.493327858755903, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.030514726415276527, |
|
"eval_runtime": 22.6134, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.5959761855881744, |
|
"grad_norm": 0.24177242815494537, |
|
"learning_rate": 0.00042491256943015846, |
|
"loss": 0.0461, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.5959761855881744, |
|
"eval_cer": 0.013477488151658768, |
|
"eval_loss": 0.02993646450340748, |
|
"eval_runtime": 22.6695, |
|
"eval_samples_per_second": 3.97, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.698624512420448, |
|
"grad_norm": 0.14063900709152222, |
|
"learning_rate": 0.00042388397449084556, |
|
"loss": 0.0469, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.698624512420448, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.02932840585708618, |
|
"eval_runtime": 22.5476, |
|
"eval_samples_per_second": 3.992, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.80127283925272, |
|
"grad_norm": 0.20371408760547638, |
|
"learning_rate": 0.00042285537955153265, |
|
"loss": 0.0473, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 7.80127283925272, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.029385393485426903, |
|
"eval_runtime": 22.4143, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 7.903921166084993, |
|
"grad_norm": 0.17325064539909363, |
|
"learning_rate": 0.00042182678461221974, |
|
"loss": 0.047, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 7.903921166084993, |
|
"eval_cer": 0.012144549763033176, |
|
"eval_loss": 0.02776852808892727, |
|
"eval_runtime": 22.3852, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 8.006569492917265, |
|
"grad_norm": 0.261836975812912, |
|
"learning_rate": 0.00042079818967290683, |
|
"loss": 0.0458, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 8.006569492917265, |
|
"eval_cer": 0.01229265402843602, |
|
"eval_loss": 0.02698938362300396, |
|
"eval_runtime": 22.4901, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 8.109217819749539, |
|
"grad_norm": 0.24788102507591248, |
|
"learning_rate": 0.00041976959473359393, |
|
"loss": 0.0448, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 8.109217819749539, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.02948344498872757, |
|
"eval_runtime": 22.4348, |
|
"eval_samples_per_second": 4.012, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 8.21186614658181, |
|
"grad_norm": 0.22888223826885223, |
|
"learning_rate": 0.000418740999794281, |
|
"loss": 0.0455, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 8.21186614658181, |
|
"eval_cer": 0.012440758293838863, |
|
"eval_loss": 0.027837086468935013, |
|
"eval_runtime": 22.6805, |
|
"eval_samples_per_second": 3.968, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 8.314514473414084, |
|
"grad_norm": 0.15464870631694794, |
|
"learning_rate": 0.00041771240485496817, |
|
"loss": 0.0456, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 8.314514473414084, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.03025979734957218, |
|
"eval_runtime": 22.4815, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 8.417162800246356, |
|
"grad_norm": 0.2563960552215576, |
|
"learning_rate": 0.00041668380991565526, |
|
"loss": 0.0449, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 8.417162800246356, |
|
"eval_cer": 0.013329383886255925, |
|
"eval_loss": 0.028577908873558044, |
|
"eval_runtime": 22.508, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 8.51981112707863, |
|
"grad_norm": 0.2178841084241867, |
|
"learning_rate": 0.00041565521497634235, |
|
"loss": 0.0453, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 8.51981112707863, |
|
"eval_cer": 0.011700236966824644, |
|
"eval_loss": 0.02777865342795849, |
|
"eval_runtime": 22.4852, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 8.622459453910901, |
|
"grad_norm": 0.16487497091293335, |
|
"learning_rate": 0.00041462662003702945, |
|
"loss": 0.0452, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 8.622459453910901, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.02851984277367592, |
|
"eval_runtime": 22.544, |
|
"eval_samples_per_second": 3.992, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 8.725107780743175, |
|
"grad_norm": 0.1772727370262146, |
|
"learning_rate": 0.00041359802509771654, |
|
"loss": 0.0455, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 8.725107780743175, |
|
"eval_cer": 0.013477488151658768, |
|
"eval_loss": 0.027403153479099274, |
|
"eval_runtime": 22.2753, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 8.827756107575446, |
|
"grad_norm": 0.2657695710659027, |
|
"learning_rate": 0.00041256943015840363, |
|
"loss": 0.0454, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 8.827756107575446, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.027919290587306023, |
|
"eval_runtime": 22.8782, |
|
"eval_samples_per_second": 3.934, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 8.93040443440772, |
|
"grad_norm": 0.18787504732608795, |
|
"learning_rate": 0.00041154083521909073, |
|
"loss": 0.0455, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 8.93040443440772, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.027214767411351204, |
|
"eval_runtime": 22.7677, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 9.033052761239992, |
|
"grad_norm": 0.21097755432128906, |
|
"learning_rate": 0.0004105122402797779, |
|
"loss": 0.0451, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 9.033052761239992, |
|
"eval_cer": 0.01273696682464455, |
|
"eval_loss": 0.02636747434735298, |
|
"eval_runtime": 22.9582, |
|
"eval_samples_per_second": 3.92, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 9.135701088072265, |
|
"grad_norm": 0.17829887568950653, |
|
"learning_rate": 0.00040948364534046497, |
|
"loss": 0.0437, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 9.135701088072265, |
|
"eval_cer": 0.01110781990521327, |
|
"eval_loss": 0.026946688070893288, |
|
"eval_runtime": 22.62, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 9.238349414904537, |
|
"grad_norm": 0.1892678588628769, |
|
"learning_rate": 0.00040845505040115206, |
|
"loss": 0.0436, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 9.238349414904537, |
|
"eval_cer": 0.013329383886255925, |
|
"eval_loss": 0.0261703971773386, |
|
"eval_runtime": 22.7469, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 9.34099774173681, |
|
"grad_norm": 0.1827981173992157, |
|
"learning_rate": 0.00040742645546183915, |
|
"loss": 0.0442, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 9.34099774173681, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.026994889602065086, |
|
"eval_runtime": 22.3199, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 9.443646068569082, |
|
"grad_norm": 0.26229721307754517, |
|
"learning_rate": 0.00040639786052252625, |
|
"loss": 0.0442, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 9.443646068569082, |
|
"eval_cer": 0.012588862559241706, |
|
"eval_loss": 0.026131337508559227, |
|
"eval_runtime": 30.6242, |
|
"eval_samples_per_second": 2.939, |
|
"eval_steps_per_second": 0.033, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 9.546294395401356, |
|
"grad_norm": 0.31516391038894653, |
|
"learning_rate": 0.00040536926558321334, |
|
"loss": 0.0442, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 9.546294395401356, |
|
"eval_cer": 0.012588862559241706, |
|
"eval_loss": 0.023602332919836044, |
|
"eval_runtime": 27.5058, |
|
"eval_samples_per_second": 3.272, |
|
"eval_steps_per_second": 0.036, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 9.648942722233627, |
|
"grad_norm": 0.19427119195461273, |
|
"learning_rate": 0.00040434067064390043, |
|
"loss": 0.0443, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 9.648942722233627, |
|
"eval_cer": 0.013033175355450236, |
|
"eval_loss": 0.02646990306675434, |
|
"eval_runtime": 27.6213, |
|
"eval_samples_per_second": 3.258, |
|
"eval_steps_per_second": 0.036, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 9.751591049065901, |
|
"grad_norm": 0.26338282227516174, |
|
"learning_rate": 0.0004033120757045876, |
|
"loss": 0.0447, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 9.751591049065901, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.025444395840168, |
|
"eval_runtime": 25.0834, |
|
"eval_samples_per_second": 3.588, |
|
"eval_steps_per_second": 0.04, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 9.854239375898173, |
|
"grad_norm": 0.25808289647102356, |
|
"learning_rate": 0.0004022834807652747, |
|
"loss": 0.0445, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 9.854239375898173, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.026209862902760506, |
|
"eval_runtime": 23.4245, |
|
"eval_samples_per_second": 3.842, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 9.956887702730446, |
|
"grad_norm": 0.18842875957489014, |
|
"learning_rate": 0.00040125488582596177, |
|
"loss": 0.0445, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 9.956887702730446, |
|
"eval_cer": 0.012440758293838863, |
|
"eval_loss": 0.027286237105727196, |
|
"eval_runtime": 22.6774, |
|
"eval_samples_per_second": 3.969, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 10.059536029562718, |
|
"grad_norm": 0.2742888331413269, |
|
"learning_rate": 0.00040022629088664886, |
|
"loss": 0.0432, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 10.059536029562718, |
|
"eval_cer": 0.011404028436018957, |
|
"eval_loss": 0.027185438200831413, |
|
"eval_runtime": 27.5005, |
|
"eval_samples_per_second": 3.273, |
|
"eval_steps_per_second": 0.036, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 10.162184356394992, |
|
"grad_norm": 0.20179295539855957, |
|
"learning_rate": 0.00039919769594733595, |
|
"loss": 0.0428, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 10.162184356394992, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.02597665973007679, |
|
"eval_runtime": 25.1306, |
|
"eval_samples_per_second": 3.581, |
|
"eval_steps_per_second": 0.04, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 10.264832683227263, |
|
"grad_norm": 0.16221770644187927, |
|
"learning_rate": 0.00039816910100802305, |
|
"loss": 0.0429, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 10.264832683227263, |
|
"eval_cer": 0.011848341232227487, |
|
"eval_loss": 0.027527980506420135, |
|
"eval_runtime": 23.4033, |
|
"eval_samples_per_second": 3.846, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 10.367481010059535, |
|
"grad_norm": 0.17623300850391388, |
|
"learning_rate": 0.00039714050606871014, |
|
"loss": 0.0435, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 10.367481010059535, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.026551930233836174, |
|
"eval_runtime": 23.1192, |
|
"eval_samples_per_second": 3.893, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 10.470129336891809, |
|
"grad_norm": 0.29380717873573303, |
|
"learning_rate": 0.0003961119111293973, |
|
"loss": 0.0431, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 10.470129336891809, |
|
"eval_cer": 0.017476303317535545, |
|
"eval_loss": 0.026500999927520752, |
|
"eval_runtime": 23.0878, |
|
"eval_samples_per_second": 3.898, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 10.572777663724082, |
|
"grad_norm": 0.19392183423042297, |
|
"learning_rate": 0.0003950833161900844, |
|
"loss": 0.043, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 10.572777663724082, |
|
"eval_cer": 0.013033175355450236, |
|
"eval_loss": 0.02609255537390709, |
|
"eval_runtime": 22.8011, |
|
"eval_samples_per_second": 3.947, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 10.675425990556354, |
|
"grad_norm": 0.17925652861595154, |
|
"learning_rate": 0.0003940547212507715, |
|
"loss": 0.0433, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 10.675425990556354, |
|
"eval_cer": 0.01273696682464455, |
|
"eval_loss": 0.027248414233326912, |
|
"eval_runtime": 22.7524, |
|
"eval_samples_per_second": 3.956, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 10.778074317388626, |
|
"grad_norm": 0.21368491649627686, |
|
"learning_rate": 0.00039302612631145857, |
|
"loss": 0.0431, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 10.778074317388626, |
|
"eval_cer": 0.01273696682464455, |
|
"eval_loss": 0.025399256497621536, |
|
"eval_runtime": 22.4849, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 10.8807226442209, |
|
"grad_norm": 0.17027121782302856, |
|
"learning_rate": 0.00039199753137214566, |
|
"loss": 0.0435, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 10.8807226442209, |
|
"eval_cer": 0.01229265402843602, |
|
"eval_loss": 0.025961685925722122, |
|
"eval_runtime": 22.3736, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 10.983370971053171, |
|
"grad_norm": 0.3062898516654968, |
|
"learning_rate": 0.00039096893643283275, |
|
"loss": 0.0434, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 10.983370971053171, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.0248806644231081, |
|
"eval_runtime": 22.4291, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 11.086019297885445, |
|
"grad_norm": 0.27476412057876587, |
|
"learning_rate": 0.00038994034149351985, |
|
"loss": 0.0418, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 11.086019297885445, |
|
"eval_cer": 0.011700236966824644, |
|
"eval_loss": 0.025811193510890007, |
|
"eval_runtime": 22.3629, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 11.188667624717716, |
|
"grad_norm": 0.18025143444538116, |
|
"learning_rate": 0.000388911746554207, |
|
"loss": 0.0419, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 11.188667624717716, |
|
"eval_cer": 0.01066350710900474, |
|
"eval_loss": 0.024703815579414368, |
|
"eval_runtime": 22.2656, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 11.29131595154999, |
|
"grad_norm": 0.19146864116191864, |
|
"learning_rate": 0.0003878831516148941, |
|
"loss": 0.042, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 11.29131595154999, |
|
"eval_cer": 0.011404028436018957, |
|
"eval_loss": 0.025849131867289543, |
|
"eval_runtime": 22.6201, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 11.393964278382262, |
|
"grad_norm": 0.4587384760379791, |
|
"learning_rate": 0.0003868545566755812, |
|
"loss": 0.042, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 11.393964278382262, |
|
"eval_cer": 0.012440758293838863, |
|
"eval_loss": 0.025229139253497124, |
|
"eval_runtime": 22.5628, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 11.496612605214535, |
|
"grad_norm": 0.1752750277519226, |
|
"learning_rate": 0.0003858259617362683, |
|
"loss": 0.0428, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 11.496612605214535, |
|
"eval_cer": 0.01066350710900474, |
|
"eval_loss": 0.02614370547235012, |
|
"eval_runtime": 22.3268, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 11.599260932046807, |
|
"grad_norm": 0.20651549100875854, |
|
"learning_rate": 0.00038479736679695537, |
|
"loss": 0.0428, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 11.599260932046807, |
|
"eval_cer": 0.01110781990521327, |
|
"eval_loss": 0.02591308392584324, |
|
"eval_runtime": 22.6162, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 11.70190925887908, |
|
"grad_norm": 0.1839723438024521, |
|
"learning_rate": 0.00038376877185764246, |
|
"loss": 0.0422, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 11.70190925887908, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.026186056435108185, |
|
"eval_runtime": 22.64, |
|
"eval_samples_per_second": 3.975, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 11.804557585711352, |
|
"grad_norm": 0.1559193879365921, |
|
"learning_rate": 0.00038274017691832955, |
|
"loss": 0.0426, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 11.804557585711352, |
|
"eval_cer": 0.011996445497630332, |
|
"eval_loss": 0.026291608810424805, |
|
"eval_runtime": 23.0438, |
|
"eval_samples_per_second": 3.906, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 11.907205912543626, |
|
"grad_norm": 0.33867180347442627, |
|
"learning_rate": 0.0003817115819790167, |
|
"loss": 0.0422, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 11.907205912543626, |
|
"eval_cer": 0.011996445497630332, |
|
"eval_loss": 0.025052817538380623, |
|
"eval_runtime": 22.5162, |
|
"eval_samples_per_second": 3.997, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 12.009854239375898, |
|
"grad_norm": 0.2387935370206833, |
|
"learning_rate": 0.0003806829870397038, |
|
"loss": 0.042, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 12.009854239375898, |
|
"eval_cer": 0.011404028436018957, |
|
"eval_loss": 0.02570049837231636, |
|
"eval_runtime": 22.5888, |
|
"eval_samples_per_second": 3.984, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 12.112502566208171, |
|
"grad_norm": 0.1758970469236374, |
|
"learning_rate": 0.0003796543921003909, |
|
"loss": 0.0406, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 12.112502566208171, |
|
"eval_cer": 0.01273696682464455, |
|
"eval_loss": 0.025880787521600723, |
|
"eval_runtime": 22.5099, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 12.215150893040443, |
|
"grad_norm": 0.2268359512090683, |
|
"learning_rate": 0.000378625797161078, |
|
"loss": 0.0409, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 12.215150893040443, |
|
"eval_cer": 0.012588862559241706, |
|
"eval_loss": 0.024862516671419144, |
|
"eval_runtime": 22.7569, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 12.317799219872716, |
|
"grad_norm": 0.15519174933433533, |
|
"learning_rate": 0.0003775972022217651, |
|
"loss": 0.0412, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 12.317799219872716, |
|
"eval_cer": 0.0115521327014218, |
|
"eval_loss": 0.025986041873693466, |
|
"eval_runtime": 22.6794, |
|
"eval_samples_per_second": 3.968, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 12.420447546704988, |
|
"grad_norm": 0.3455216884613037, |
|
"learning_rate": 0.00037656860728245217, |
|
"loss": 0.0411, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 12.420447546704988, |
|
"eval_cer": 0.011700236966824644, |
|
"eval_loss": 0.025649528950452805, |
|
"eval_runtime": 22.7315, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 12.523095873537262, |
|
"grad_norm": 0.20411798357963562, |
|
"learning_rate": 0.00037554001234313926, |
|
"loss": 0.0418, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 12.523095873537262, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.02615606226027012, |
|
"eval_runtime": 22.7606, |
|
"eval_samples_per_second": 3.954, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 12.625744200369533, |
|
"grad_norm": 0.25552111864089966, |
|
"learning_rate": 0.0003745114174038264, |
|
"loss": 0.0414, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 12.625744200369533, |
|
"eval_cer": 0.01273696682464455, |
|
"eval_loss": 0.024597780779004097, |
|
"eval_runtime": 22.7241, |
|
"eval_samples_per_second": 3.961, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 12.728392527201807, |
|
"grad_norm": 0.24297872185707092, |
|
"learning_rate": 0.0003734828224645135, |
|
"loss": 0.0416, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 12.728392527201807, |
|
"eval_cer": 0.011996445497630332, |
|
"eval_loss": 0.0245036818087101, |
|
"eval_runtime": 22.3574, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 12.831040854034079, |
|
"grad_norm": 0.16708943247795105, |
|
"learning_rate": 0.0003724542275252006, |
|
"loss": 0.0421, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 12.831040854034079, |
|
"eval_cer": 0.010959715639810427, |
|
"eval_loss": 0.023803560063242912, |
|
"eval_runtime": 22.1003, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 12.933689180866352, |
|
"grad_norm": 0.2480056881904602, |
|
"learning_rate": 0.0003714256325858877, |
|
"loss": 0.0417, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 12.933689180866352, |
|
"eval_cer": 0.010959715639810427, |
|
"eval_loss": 0.02339034155011177, |
|
"eval_runtime": 22.1887, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 13.036337507698624, |
|
"grad_norm": 0.15843307971954346, |
|
"learning_rate": 0.0003703970376465748, |
|
"loss": 0.041, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 13.036337507698624, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.023780081421136856, |
|
"eval_runtime": 22.1307, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 13.138985834530898, |
|
"grad_norm": 0.17070743441581726, |
|
"learning_rate": 0.00036936844270726187, |
|
"loss": 0.0397, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 13.138985834530898, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.0229303240776062, |
|
"eval_runtime": 22.379, |
|
"eval_samples_per_second": 4.022, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 13.24163416136317, |
|
"grad_norm": 0.1719464212656021, |
|
"learning_rate": 0.00036833984776794897, |
|
"loss": 0.0405, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 13.24163416136317, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.023020587861537933, |
|
"eval_runtime": 22.2447, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 13.344282488195443, |
|
"grad_norm": 0.16199146211147308, |
|
"learning_rate": 0.0003673112528286361, |
|
"loss": 0.0405, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 13.344282488195443, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.023113010451197624, |
|
"eval_runtime": 22.549, |
|
"eval_samples_per_second": 3.991, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 13.446930815027715, |
|
"grad_norm": 0.21660035848617554, |
|
"learning_rate": 0.0003662826578893232, |
|
"loss": 0.0406, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 13.446930815027715, |
|
"eval_cer": 0.01229265402843602, |
|
"eval_loss": 0.023945845663547516, |
|
"eval_runtime": 22.3889, |
|
"eval_samples_per_second": 4.02, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 13.549579141859988, |
|
"grad_norm": 0.3124329447746277, |
|
"learning_rate": 0.0003652540629500103, |
|
"loss": 0.0406, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 13.549579141859988, |
|
"eval_cer": 0.01110781990521327, |
|
"eval_loss": 0.024438710883259773, |
|
"eval_runtime": 22.5799, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 13.65222746869226, |
|
"grad_norm": 0.16738218069076538, |
|
"learning_rate": 0.0003642254680106974, |
|
"loss": 0.0406, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 13.65222746869226, |
|
"eval_cer": 0.011848341232227487, |
|
"eval_loss": 0.024815011769533157, |
|
"eval_runtime": 22.3814, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 13.754875795524534, |
|
"grad_norm": 0.1927761733531952, |
|
"learning_rate": 0.0003631968730713845, |
|
"loss": 0.0407, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 13.754875795524534, |
|
"eval_cer": 0.010959715639810427, |
|
"eval_loss": 0.023673338815569878, |
|
"eval_runtime": 22.7336, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 13.857524122356805, |
|
"grad_norm": 0.17141355574131012, |
|
"learning_rate": 0.0003621682781320716, |
|
"loss": 0.0411, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 13.857524122356805, |
|
"eval_cer": 0.010959715639810427, |
|
"eval_loss": 0.02259986102581024, |
|
"eval_runtime": 22.6132, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 13.960172449189079, |
|
"grad_norm": 0.24508166313171387, |
|
"learning_rate": 0.00036113968319275867, |
|
"loss": 0.0405, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 13.960172449189079, |
|
"eval_cer": 0.01066350710900474, |
|
"eval_loss": 0.022703783586621284, |
|
"eval_runtime": 22.7539, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 14.06282077602135, |
|
"grad_norm": 0.16717751324176788, |
|
"learning_rate": 0.0003601110882534458, |
|
"loss": 0.0401, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 14.06282077602135, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.02189534902572632, |
|
"eval_runtime": 22.4478, |
|
"eval_samples_per_second": 4.009, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 14.165469102853624, |
|
"grad_norm": 0.42751288414001465, |
|
"learning_rate": 0.0003590824933141329, |
|
"loss": 0.0391, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 14.165469102853624, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.022342221811413765, |
|
"eval_runtime": 22.5149, |
|
"eval_samples_per_second": 3.997, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 14.268117429685896, |
|
"grad_norm": 0.2213069647550583, |
|
"learning_rate": 0.00035805389837482, |
|
"loss": 0.0394, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 14.268117429685896, |
|
"eval_cer": 0.010959715639810427, |
|
"eval_loss": 0.021898576989769936, |
|
"eval_runtime": 22.2749, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 14.37076575651817, |
|
"grad_norm": 0.17157946527004242, |
|
"learning_rate": 0.0003570253034355071, |
|
"loss": 0.0395, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 14.37076575651817, |
|
"eval_cer": 0.010811611374407584, |
|
"eval_loss": 0.023508407175540924, |
|
"eval_runtime": 22.3534, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 14.473414083350441, |
|
"grad_norm": 0.26436519622802734, |
|
"learning_rate": 0.0003559967084961942, |
|
"loss": 0.0398, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 14.473414083350441, |
|
"eval_cer": 0.011255924170616114, |
|
"eval_loss": 0.022889673709869385, |
|
"eval_runtime": 22.2676, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 14.576062410182715, |
|
"grad_norm": 0.15638813376426697, |
|
"learning_rate": 0.0003549681135568813, |
|
"loss": 0.0399, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 14.576062410182715, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.022171661257743835, |
|
"eval_runtime": 22.6709, |
|
"eval_samples_per_second": 3.97, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 14.678710737014987, |
|
"grad_norm": 0.22069737315177917, |
|
"learning_rate": 0.0003539395186175684, |
|
"loss": 0.0397, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 14.678710737014987, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.02216203510761261, |
|
"eval_runtime": 22.5252, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 14.78135906384726, |
|
"grad_norm": 0.25842490792274475, |
|
"learning_rate": 0.0003529109236782555, |
|
"loss": 0.0398, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 14.78135906384726, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.023699576035141945, |
|
"eval_runtime": 22.5737, |
|
"eval_samples_per_second": 3.987, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 14.884007390679532, |
|
"grad_norm": 0.2184634506702423, |
|
"learning_rate": 0.0003518823287389426, |
|
"loss": 0.0402, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 14.884007390679532, |
|
"eval_cer": 0.011255924170616114, |
|
"eval_loss": 0.022893035784363747, |
|
"eval_runtime": 22.3846, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 14.986655717511805, |
|
"grad_norm": 0.19810239970684052, |
|
"learning_rate": 0.0003508537337996297, |
|
"loss": 0.0403, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 14.986655717511805, |
|
"eval_cer": 0.011996445497630332, |
|
"eval_loss": 0.024159209802746773, |
|
"eval_runtime": 22.5604, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 15.089304044344077, |
|
"grad_norm": 0.2137177586555481, |
|
"learning_rate": 0.0003498251388603168, |
|
"loss": 0.0385, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 15.089304044344077, |
|
"eval_cer": 0.010811611374407584, |
|
"eval_loss": 0.022794917225837708, |
|
"eval_runtime": 22.3128, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 15.19195237117635, |
|
"grad_norm": 0.1722225844860077, |
|
"learning_rate": 0.0003487965439210039, |
|
"loss": 0.0386, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 15.19195237117635, |
|
"eval_cer": 0.011848341232227487, |
|
"eval_loss": 0.02336839959025383, |
|
"eval_runtime": 22.3982, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 15.294600698008622, |
|
"grad_norm": 0.20236076414585114, |
|
"learning_rate": 0.000347767948981691, |
|
"loss": 0.0392, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 15.294600698008622, |
|
"eval_cer": 0.01229265402843602, |
|
"eval_loss": 0.02401108108460903, |
|
"eval_runtime": 22.0603, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 15.397249024840896, |
|
"grad_norm": 0.1955161690711975, |
|
"learning_rate": 0.0003467393540423781, |
|
"loss": 0.039, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 15.397249024840896, |
|
"eval_cer": 0.010811611374407584, |
|
"eval_loss": 0.022156517952680588, |
|
"eval_runtime": 22.3641, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 15.499897351673168, |
|
"grad_norm": 0.24897447228431702, |
|
"learning_rate": 0.00034571075910306523, |
|
"loss": 0.0391, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 15.499897351673168, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.022661181166768074, |
|
"eval_runtime": 22.3484, |
|
"eval_samples_per_second": 4.027, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 15.60254567850544, |
|
"grad_norm": 0.1920953094959259, |
|
"learning_rate": 0.0003446821641637523, |
|
"loss": 0.0394, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 15.60254567850544, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.021091148257255554, |
|
"eval_runtime": 22.528, |
|
"eval_samples_per_second": 3.995, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 15.705194005337713, |
|
"grad_norm": 0.20325519144535065, |
|
"learning_rate": 0.0003436535692244394, |
|
"loss": 0.0396, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 15.705194005337713, |
|
"eval_cer": 0.010811611374407584, |
|
"eval_loss": 0.023403970524668694, |
|
"eval_runtime": 22.2476, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 15.807842332169985, |
|
"grad_norm": 0.15232166647911072, |
|
"learning_rate": 0.0003426249742851265, |
|
"loss": 0.0394, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 15.807842332169985, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.022816922515630722, |
|
"eval_runtime": 22.7231, |
|
"eval_samples_per_second": 3.961, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 15.910490659002258, |
|
"grad_norm": 0.3241395056247711, |
|
"learning_rate": 0.0003415963793458136, |
|
"loss": 0.0392, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 15.910490659002258, |
|
"eval_cer": 0.010811611374407584, |
|
"eval_loss": 0.021291887387633324, |
|
"eval_runtime": 22.2235, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 16.01313898583453, |
|
"grad_norm": 0.19988052546977997, |
|
"learning_rate": 0.0003405677844065007, |
|
"loss": 0.0393, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 16.01313898583453, |
|
"eval_cer": 0.011404028436018957, |
|
"eval_loss": 0.02226296253502369, |
|
"eval_runtime": 22.2614, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 16.115787312666804, |
|
"grad_norm": 0.23728616535663605, |
|
"learning_rate": 0.0003395391894671878, |
|
"loss": 0.0382, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 16.115787312666804, |
|
"eval_cer": 0.011255924170616114, |
|
"eval_loss": 0.021734587848186493, |
|
"eval_runtime": 22.1722, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 16.218435639499077, |
|
"grad_norm": 0.21486635506153107, |
|
"learning_rate": 0.00033851059452787494, |
|
"loss": 0.0379, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 16.218435639499077, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.02133306674659252, |
|
"eval_runtime": 22.2489, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 16.321083966331347, |
|
"grad_norm": 0.21918782591819763, |
|
"learning_rate": 0.00033748199958856203, |
|
"loss": 0.0382, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 16.321083966331347, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.022134315222501755, |
|
"eval_runtime": 22.3537, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 16.42373229316362, |
|
"grad_norm": 0.1927264928817749, |
|
"learning_rate": 0.0003364534046492491, |
|
"loss": 0.0383, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 16.42373229316362, |
|
"eval_cer": 0.0115521327014218, |
|
"eval_loss": 0.022146208211779594, |
|
"eval_runtime": 22.5984, |
|
"eval_samples_per_second": 3.983, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 16.526380619995894, |
|
"grad_norm": 0.19513466954231262, |
|
"learning_rate": 0.0003354248097099362, |
|
"loss": 0.0386, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 16.526380619995894, |
|
"eval_cer": 0.011700236966824644, |
|
"eval_loss": 0.021798642352223396, |
|
"eval_runtime": 22.2348, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 16.629028946828168, |
|
"grad_norm": 0.1991739124059677, |
|
"learning_rate": 0.0003343962147706233, |
|
"loss": 0.038, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 16.629028946828168, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.021404601633548737, |
|
"eval_runtime": 22.5257, |
|
"eval_samples_per_second": 3.995, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 16.731677273660438, |
|
"grad_norm": 0.19290116429328918, |
|
"learning_rate": 0.0003333676198313104, |
|
"loss": 0.0389, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 16.731677273660438, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.0213669091463089, |
|
"eval_runtime": 22.5781, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 16.83432560049271, |
|
"grad_norm": 0.29244861006736755, |
|
"learning_rate": 0.0003323390248919975, |
|
"loss": 0.0384, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 16.83432560049271, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.02153705060482025, |
|
"eval_runtime": 22.335, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 16.936973927324985, |
|
"grad_norm": 0.17148034274578094, |
|
"learning_rate": 0.00033131042995268465, |
|
"loss": 0.0381, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 16.936973927324985, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.022320713847875595, |
|
"eval_runtime": 22.0009, |
|
"eval_samples_per_second": 4.091, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 17.03962225415726, |
|
"grad_norm": 0.31796592473983765, |
|
"learning_rate": 0.00033028183501337174, |
|
"loss": 0.0384, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 17.03962225415726, |
|
"eval_cer": 0.01110781990521327, |
|
"eval_loss": 0.02184494584798813, |
|
"eval_runtime": 22.2781, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 17.14227058098953, |
|
"grad_norm": 0.2634246051311493, |
|
"learning_rate": 0.00032925324007405883, |
|
"loss": 0.0371, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 17.14227058098953, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.022673843428492546, |
|
"eval_runtime": 22.5449, |
|
"eval_samples_per_second": 3.992, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 17.244918907821802, |
|
"grad_norm": 0.21225817501544952, |
|
"learning_rate": 0.0003282246451347459, |
|
"loss": 0.0372, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 17.244918907821802, |
|
"eval_cer": 0.011848341232227487, |
|
"eval_loss": 0.021213963627815247, |
|
"eval_runtime": 22.3119, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 17.347567234654075, |
|
"grad_norm": 0.30099403858184814, |
|
"learning_rate": 0.000327196050195433, |
|
"loss": 0.0375, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 17.347567234654075, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.021301671862602234, |
|
"eval_runtime": 22.2137, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 17.45021556148635, |
|
"grad_norm": 0.20359040796756744, |
|
"learning_rate": 0.0003261674552561201, |
|
"loss": 0.0378, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 17.45021556148635, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.02227012813091278, |
|
"eval_runtime": 22.5554, |
|
"eval_samples_per_second": 3.99, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 17.55286388831862, |
|
"grad_norm": 0.225717231631279, |
|
"learning_rate": 0.0003251388603168072, |
|
"loss": 0.0381, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 17.55286388831862, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.02183985523879528, |
|
"eval_runtime": 22.3674, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 17.655512215150893, |
|
"grad_norm": 0.23642343282699585, |
|
"learning_rate": 0.00032411026537749435, |
|
"loss": 0.038, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 17.655512215150893, |
|
"eval_cer": 0.010071090047393365, |
|
"eval_loss": 0.02163875661790371, |
|
"eval_runtime": 22.3807, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 17.758160541983166, |
|
"grad_norm": 0.206275075674057, |
|
"learning_rate": 0.00032308167043818144, |
|
"loss": 0.0381, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 17.758160541983166, |
|
"eval_cer": 0.01110781990521327, |
|
"eval_loss": 0.021833743900060654, |
|
"eval_runtime": 22.1601, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 17.86080886881544, |
|
"grad_norm": 0.1906212568283081, |
|
"learning_rate": 0.00032205307549886854, |
|
"loss": 0.0376, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 17.86080886881544, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.0216918233782053, |
|
"eval_runtime": 22.5419, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 17.96345719564771, |
|
"grad_norm": 0.2309373915195465, |
|
"learning_rate": 0.00032102448055955563, |
|
"loss": 0.0379, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 17.96345719564771, |
|
"eval_cer": 0.01229265402843602, |
|
"eval_loss": 0.022404534742236137, |
|
"eval_runtime": 22.4205, |
|
"eval_samples_per_second": 4.014, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 18.066105522479983, |
|
"grad_norm": 0.26210764050483704, |
|
"learning_rate": 0.0003199958856202427, |
|
"loss": 0.037, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 18.066105522479983, |
|
"eval_cer": 0.009626777251184835, |
|
"eval_loss": 0.02186032012104988, |
|
"eval_runtime": 22.3427, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 18.168753849312257, |
|
"grad_norm": 0.18146245181560516, |
|
"learning_rate": 0.0003189672906809298, |
|
"loss": 0.0366, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 18.168753849312257, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.021789953112602234, |
|
"eval_runtime": 22.2303, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 18.27140217614453, |
|
"grad_norm": 0.21234826743602753, |
|
"learning_rate": 0.0003179386957416169, |
|
"loss": 0.0364, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 18.27140217614453, |
|
"eval_cer": 0.010071090047393365, |
|
"eval_loss": 0.023085610941052437, |
|
"eval_runtime": 22.5005, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 18.3740505029768, |
|
"grad_norm": 0.20181190967559814, |
|
"learning_rate": 0.00031691010080230406, |
|
"loss": 0.037, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 18.3740505029768, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.022074325010180473, |
|
"eval_runtime": 22.2124, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 18.476698829809074, |
|
"grad_norm": 0.20344142615795135, |
|
"learning_rate": 0.00031588150586299115, |
|
"loss": 0.0368, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 18.476698829809074, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.02133142203092575, |
|
"eval_runtime": 22.4401, |
|
"eval_samples_per_second": 4.011, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 18.579347156641347, |
|
"grad_norm": 0.26208797097206116, |
|
"learning_rate": 0.00031485291092367824, |
|
"loss": 0.0374, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 18.579347156641347, |
|
"eval_cer": 0.010811611374407584, |
|
"eval_loss": 0.022065425291657448, |
|
"eval_runtime": 22.2276, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 18.68199548347362, |
|
"grad_norm": 0.21672701835632324, |
|
"learning_rate": 0.00031382431598436534, |
|
"loss": 0.0378, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 18.68199548347362, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.022190110757946968, |
|
"eval_runtime": 22.4805, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 18.78464381030589, |
|
"grad_norm": 0.26024818420410156, |
|
"learning_rate": 0.00031279572104505243, |
|
"loss": 0.0373, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 18.78464381030589, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.0211643036454916, |
|
"eval_runtime": 22.2696, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 18.887292137138164, |
|
"grad_norm": 0.15596991777420044, |
|
"learning_rate": 0.0003117671261057395, |
|
"loss": 0.0374, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 18.887292137138164, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.02078518457710743, |
|
"eval_runtime": 22.3696, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 18.989940463970438, |
|
"grad_norm": 0.21325981616973877, |
|
"learning_rate": 0.0003107385311664266, |
|
"loss": 0.0377, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 18.989940463970438, |
|
"eval_cer": 0.011700236966824644, |
|
"eval_loss": 0.021274788305163383, |
|
"eval_runtime": 22.3681, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 19.09258879080271, |
|
"grad_norm": 0.2881476581096649, |
|
"learning_rate": 0.00030970993622711376, |
|
"loss": 0.0362, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 19.09258879080271, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.020506886765360832, |
|
"eval_runtime": 22.4918, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 19.19523711763498, |
|
"grad_norm": 0.2128625214099884, |
|
"learning_rate": 0.00030868134128780086, |
|
"loss": 0.036, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 19.19523711763498, |
|
"eval_cer": 0.0115521327014218, |
|
"eval_loss": 0.021428626030683517, |
|
"eval_runtime": 22.4254, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 19.297885444467255, |
|
"grad_norm": 0.20976155996322632, |
|
"learning_rate": 0.000307652746348488, |
|
"loss": 0.0367, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 19.297885444467255, |
|
"eval_cer": 0.010959715639810427, |
|
"eval_loss": 0.020786074921488762, |
|
"eval_runtime": 22.3038, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 19.40053377129953, |
|
"grad_norm": 0.21684007346630096, |
|
"learning_rate": 0.0003066241514091751, |
|
"loss": 0.0363, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 19.40053377129953, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.020662952214479446, |
|
"eval_runtime": 22.499, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 19.503182098131802, |
|
"grad_norm": 0.4317739009857178, |
|
"learning_rate": 0.0003055955564698622, |
|
"loss": 0.0365, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 19.503182098131802, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.021092107519507408, |
|
"eval_runtime": 22.4144, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 19.605830424964072, |
|
"grad_norm": 0.23220385611057281, |
|
"learning_rate": 0.0003045669615305493, |
|
"loss": 0.0364, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 19.605830424964072, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.020856238901615143, |
|
"eval_runtime": 22.4251, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 19.708478751796346, |
|
"grad_norm": 0.20682792365550995, |
|
"learning_rate": 0.0003035383665912364, |
|
"loss": 0.0369, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 19.708478751796346, |
|
"eval_cer": 0.01066350710900474, |
|
"eval_loss": 0.020980246365070343, |
|
"eval_runtime": 22.3208, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 19.81112707862862, |
|
"grad_norm": 0.160901740193367, |
|
"learning_rate": 0.0003025097716519235, |
|
"loss": 0.0366, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 19.81112707862862, |
|
"eval_cer": 0.009626777251184835, |
|
"eval_loss": 0.02076118066906929, |
|
"eval_runtime": 22.2895, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 19.913775405460893, |
|
"grad_norm": 0.25278541445732117, |
|
"learning_rate": 0.0003014811767126106, |
|
"loss": 0.0369, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 19.913775405460893, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.02101094461977482, |
|
"eval_runtime": 22.1128, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 20.016423732293163, |
|
"grad_norm": 0.18868520855903625, |
|
"learning_rate": 0.0003004525817732977, |
|
"loss": 0.0365, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 20.016423732293163, |
|
"eval_cer": 0.010071090047393365, |
|
"eval_loss": 0.021312745288014412, |
|
"eval_runtime": 22.2522, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 20.119072059125436, |
|
"grad_norm": 0.16919797658920288, |
|
"learning_rate": 0.0002994239868339848, |
|
"loss": 0.0358, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 20.119072059125436, |
|
"eval_cer": 0.009626777251184835, |
|
"eval_loss": 0.020815536379814148, |
|
"eval_runtime": 22.1691, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 20.22172038595771, |
|
"grad_norm": 0.2387053221464157, |
|
"learning_rate": 0.0002983953918946719, |
|
"loss": 0.0357, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 20.22172038595771, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.022103123366832733, |
|
"eval_runtime": 22.0878, |
|
"eval_samples_per_second": 4.075, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 20.324368712789983, |
|
"grad_norm": 0.1803978830575943, |
|
"learning_rate": 0.000297366796955359, |
|
"loss": 0.036, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 20.324368712789983, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.02122490108013153, |
|
"eval_runtime": 22.4345, |
|
"eval_samples_per_second": 4.012, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 20.427017039622253, |
|
"grad_norm": 0.30070099234580994, |
|
"learning_rate": 0.0002963382020160461, |
|
"loss": 0.0358, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 20.427017039622253, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.020686373114585876, |
|
"eval_runtime": 22.3744, |
|
"eval_samples_per_second": 4.022, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 20.529665366454527, |
|
"grad_norm": 0.20007756352424622, |
|
"learning_rate": 0.00029530960707673323, |
|
"loss": 0.0361, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 20.529665366454527, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.019611194729804993, |
|
"eval_runtime": 22.3448, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 20.6323136932868, |
|
"grad_norm": 0.2129463404417038, |
|
"learning_rate": 0.0002942810121374203, |
|
"loss": 0.0363, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 20.6323136932868, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.021008532494306564, |
|
"eval_runtime": 22.292, |
|
"eval_samples_per_second": 4.037, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 20.73496202011907, |
|
"grad_norm": 0.16587744653224945, |
|
"learning_rate": 0.0002932524171981074, |
|
"loss": 0.0359, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 20.73496202011907, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.02170945331454277, |
|
"eval_runtime": 22.2982, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 20.837610346951344, |
|
"grad_norm": 0.1517147719860077, |
|
"learning_rate": 0.0002922238222587945, |
|
"loss": 0.0362, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 20.837610346951344, |
|
"eval_cer": 0.01110781990521327, |
|
"eval_loss": 0.021431386470794678, |
|
"eval_runtime": 22.317, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 20.940258673783617, |
|
"grad_norm": 0.33937105536460876, |
|
"learning_rate": 0.0002911952273194816, |
|
"loss": 0.0359, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 20.940258673783617, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.01971680298447609, |
|
"eval_runtime": 22.523, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 21.04290700061589, |
|
"grad_norm": 0.19971835613250732, |
|
"learning_rate": 0.0002901666323801687, |
|
"loss": 0.0357, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 21.04290700061589, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.020647110417485237, |
|
"eval_runtime": 22.1889, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 21.14555532744816, |
|
"grad_norm": 0.2110970914363861, |
|
"learning_rate": 0.0002891380374408558, |
|
"loss": 0.0346, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 21.14555532744816, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.01989123784005642, |
|
"eval_runtime": 22.7934, |
|
"eval_samples_per_second": 3.949, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 21.248203654280434, |
|
"grad_norm": 0.20562046766281128, |
|
"learning_rate": 0.00028810944250154294, |
|
"loss": 0.0349, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 21.248203654280434, |
|
"eval_cer": 0.010515402843601895, |
|
"eval_loss": 0.020657481625676155, |
|
"eval_runtime": 22.7975, |
|
"eval_samples_per_second": 3.948, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 21.350851981112708, |
|
"grad_norm": 0.16318105161190033, |
|
"learning_rate": 0.00028708084756223003, |
|
"loss": 0.0353, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 21.350851981112708, |
|
"eval_cer": 0.010367298578199052, |
|
"eval_loss": 0.01860020123422146, |
|
"eval_runtime": 22.3138, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 21.45350030794498, |
|
"grad_norm": 0.2521456480026245, |
|
"learning_rate": 0.0002860522526229171, |
|
"loss": 0.0356, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 21.45350030794498, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.01932937651872635, |
|
"eval_runtime": 22.5041, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 21.55614863477725, |
|
"grad_norm": 0.15729331970214844, |
|
"learning_rate": 0.0002850236576836042, |
|
"loss": 0.0356, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 21.55614863477725, |
|
"eval_cer": 0.01066350710900474, |
|
"eval_loss": 0.01956385001540184, |
|
"eval_runtime": 22.4886, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 21.658796961609525, |
|
"grad_norm": 0.3368454575538635, |
|
"learning_rate": 0.0002839950627442913, |
|
"loss": 0.0355, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 21.658796961609525, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.017849326133728027, |
|
"eval_runtime": 22.2354, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 21.7614452884418, |
|
"grad_norm": 0.19548599421977997, |
|
"learning_rate": 0.0002829664678049784, |
|
"loss": 0.0356, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 21.7614452884418, |
|
"eval_cer": 0.010071090047393365, |
|
"eval_loss": 0.018993813544511795, |
|
"eval_runtime": 22.2405, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 21.864093615274072, |
|
"grad_norm": 0.300447553396225, |
|
"learning_rate": 0.0002819378728656655, |
|
"loss": 0.0355, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 21.864093615274072, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.018936272710561752, |
|
"eval_runtime": 22.3288, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 21.966741942106342, |
|
"grad_norm": 0.2299223691225052, |
|
"learning_rate": 0.00028090927792635264, |
|
"loss": 0.0357, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 21.966741942106342, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.017861895263195038, |
|
"eval_runtime": 22.4153, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 22.069390268938616, |
|
"grad_norm": 0.22763746976852417, |
|
"learning_rate": 0.00027988068298703974, |
|
"loss": 0.035, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 22.069390268938616, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.018531307578086853, |
|
"eval_runtime": 22.5994, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04289544235924933, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 22.17203859577089, |
|
"grad_norm": 0.17821846902370453, |
|
"learning_rate": 0.00027885208804772683, |
|
"loss": 0.0343, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 22.17203859577089, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.01801004819571972, |
|
"eval_runtime": 22.6174, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 22.274686922603163, |
|
"grad_norm": 0.20817448198795319, |
|
"learning_rate": 0.0002778234931084139, |
|
"loss": 0.0353, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 22.274686922603163, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.01922653615474701, |
|
"eval_runtime": 22.3133, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 22.377335249435433, |
|
"grad_norm": 0.23358240723609924, |
|
"learning_rate": 0.000276794898169101, |
|
"loss": 0.0347, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 22.377335249435433, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.019061286002397537, |
|
"eval_runtime": 22.516, |
|
"eval_samples_per_second": 3.997, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 22.479983576267706, |
|
"grad_norm": 0.1586250215768814, |
|
"learning_rate": 0.0002757663032297881, |
|
"loss": 0.0344, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 22.479983576267706, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.018482740968465805, |
|
"eval_runtime": 22.7441, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 22.58263190309998, |
|
"grad_norm": 0.18222519755363464, |
|
"learning_rate": 0.0002747377082904752, |
|
"loss": 0.0355, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 22.58263190309998, |
|
"eval_cer": 0.010071090047393365, |
|
"eval_loss": 0.01949753239750862, |
|
"eval_runtime": 22.5921, |
|
"eval_samples_per_second": 3.984, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0420017873100983, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 22.685280229932253, |
|
"grad_norm": 0.24508023262023926, |
|
"learning_rate": 0.00027370911335116235, |
|
"loss": 0.0351, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 22.685280229932253, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.019236860796809196, |
|
"eval_runtime": 22.1523, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 22.787928556764523, |
|
"grad_norm": 0.20518304407596588, |
|
"learning_rate": 0.00027268051841184944, |
|
"loss": 0.0349, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 22.787928556764523, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.01924031414091587, |
|
"eval_runtime": 22.4441, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 22.890576883596797, |
|
"grad_norm": 0.34271785616874695, |
|
"learning_rate": 0.00027165192347253654, |
|
"loss": 0.0354, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 22.890576883596797, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.018572239205241203, |
|
"eval_runtime": 22.2344, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 22.99322521042907, |
|
"grad_norm": 0.23716846108436584, |
|
"learning_rate": 0.00027062332853322363, |
|
"loss": 0.0351, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 22.99322521042907, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.018431993201375008, |
|
"eval_runtime": 22.1175, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 23.095873537261344, |
|
"grad_norm": 0.20996840298175812, |
|
"learning_rate": 0.0002695947335939107, |
|
"loss": 0.0336, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 23.095873537261344, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.01876632496714592, |
|
"eval_runtime": 22.1146, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 23.198521864093614, |
|
"grad_norm": 0.24316902458667755, |
|
"learning_rate": 0.0002685661386545978, |
|
"loss": 0.0342, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 23.198521864093614, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.018810244277119637, |
|
"eval_runtime": 22.534, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 23.301170190925887, |
|
"grad_norm": 0.21960946917533875, |
|
"learning_rate": 0.00026753754371528496, |
|
"loss": 0.0341, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 23.301170190925887, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.01906524784862995, |
|
"eval_runtime": 22.5098, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 23.40381851775816, |
|
"grad_norm": 0.24521832168102264, |
|
"learning_rate": 0.00026650894877597206, |
|
"loss": 0.0346, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 23.40381851775816, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.017732921987771988, |
|
"eval_runtime": 22.3346, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 23.506466844590435, |
|
"grad_norm": 0.20621488988399506, |
|
"learning_rate": 0.00026548035383665915, |
|
"loss": 0.0341, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 23.506466844590435, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.018322736024856567, |
|
"eval_runtime": 22.307, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 23.609115171422705, |
|
"grad_norm": 0.22831113636493683, |
|
"learning_rate": 0.00026445175889734624, |
|
"loss": 0.0348, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 23.609115171422705, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.018174562603235245, |
|
"eval_runtime": 22.2887, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 23.711763498254978, |
|
"grad_norm": 0.26975417137145996, |
|
"learning_rate": 0.00026342316395803334, |
|
"loss": 0.0343, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 23.711763498254978, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.017916005104780197, |
|
"eval_runtime": 22.3158, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 23.81441182508725, |
|
"grad_norm": 0.18927611410617828, |
|
"learning_rate": 0.00026239456901872043, |
|
"loss": 0.0347, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 23.81441182508725, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.018648013472557068, |
|
"eval_runtime": 22.3793, |
|
"eval_samples_per_second": 4.022, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 23.917060151919525, |
|
"grad_norm": 0.3485555350780487, |
|
"learning_rate": 0.0002613659740794075, |
|
"loss": 0.0349, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 23.917060151919525, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.018734946846961975, |
|
"eval_runtime": 22.4017, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 24.019708478751795, |
|
"grad_norm": 0.19541509449481964, |
|
"learning_rate": 0.00026033737914009467, |
|
"loss": 0.0346, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 24.019708478751795, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.01827438361942768, |
|
"eval_runtime": 22.4976, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 24.12235680558407, |
|
"grad_norm": 0.15688838064670563, |
|
"learning_rate": 0.00025930878420078176, |
|
"loss": 0.0335, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 24.12235680558407, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.01861654222011566, |
|
"eval_runtime": 22.4918, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 24.225005132416342, |
|
"grad_norm": 0.15766377747058868, |
|
"learning_rate": 0.00025828018926146886, |
|
"loss": 0.0334, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 24.225005132416342, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.017844926565885544, |
|
"eval_runtime": 22.542, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 24.327653459248616, |
|
"grad_norm": 0.27199745178222656, |
|
"learning_rate": 0.00025725159432215595, |
|
"loss": 0.0335, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 24.327653459248616, |
|
"eval_cer": 0.010219194312796208, |
|
"eval_loss": 0.018539218232035637, |
|
"eval_runtime": 22.1807, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 24.430301786080886, |
|
"grad_norm": 0.2251148819923401, |
|
"learning_rate": 0.00025622299938284304, |
|
"loss": 0.0339, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 24.430301786080886, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.018467124551534653, |
|
"eval_runtime": 22.5004, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 24.53295011291316, |
|
"grad_norm": 0.1856725960969925, |
|
"learning_rate": 0.00025519440444353014, |
|
"loss": 0.0343, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 24.53295011291316, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.01805044710636139, |
|
"eval_runtime": 22.2875, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 24.635598439745433, |
|
"grad_norm": 0.23337046802043915, |
|
"learning_rate": 0.00025416580950421723, |
|
"loss": 0.0341, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 24.635598439745433, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.018100356683135033, |
|
"eval_runtime": 22.3689, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 24.738246766577706, |
|
"grad_norm": 0.3009665012359619, |
|
"learning_rate": 0.0002531372145649044, |
|
"loss": 0.0342, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 24.738246766577706, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.017376452684402466, |
|
"eval_runtime": 22.3829, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 24.840895093409976, |
|
"grad_norm": 0.20083321630954742, |
|
"learning_rate": 0.00025210861962559147, |
|
"loss": 0.0341, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 24.840895093409976, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.018101993948221207, |
|
"eval_runtime": 22.6389, |
|
"eval_samples_per_second": 3.975, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 24.94354342024225, |
|
"grad_norm": 0.2611863315105438, |
|
"learning_rate": 0.00025108002468627856, |
|
"loss": 0.034, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 24.94354342024225, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.01786319725215435, |
|
"eval_runtime": 22.5343, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 25.046191747074523, |
|
"grad_norm": 0.22002221643924713, |
|
"learning_rate": 0.00025005142974696566, |
|
"loss": 0.0337, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 25.046191747074523, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.01813172735273838, |
|
"eval_runtime": 22.7449, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 25.148840073906797, |
|
"grad_norm": 0.22292836010456085, |
|
"learning_rate": 0.00024902283480765275, |
|
"loss": 0.0331, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 25.148840073906797, |
|
"eval_cer": 0.010071090047393365, |
|
"eval_loss": 0.018383100628852844, |
|
"eval_runtime": 22.3583, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 25.251488400739067, |
|
"grad_norm": 0.22255383431911469, |
|
"learning_rate": 0.00024799423986833984, |
|
"loss": 0.0332, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 25.251488400739067, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.018330469727516174, |
|
"eval_runtime": 22.5685, |
|
"eval_samples_per_second": 3.988, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.041108132260947276, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 25.35413672757134, |
|
"grad_norm": 0.16578556597232819, |
|
"learning_rate": 0.00024696564492902694, |
|
"loss": 0.0335, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 25.35413672757134, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.017710883170366287, |
|
"eval_runtime": 22.2874, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 25.456785054403614, |
|
"grad_norm": 0.24286945164203644, |
|
"learning_rate": 0.0002459370499897141, |
|
"loss": 0.0336, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 25.456785054403614, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.017551274970173836, |
|
"eval_runtime": 22.5028, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 25.559433381235884, |
|
"grad_norm": 0.2211551070213318, |
|
"learning_rate": 0.0002449084550504012, |
|
"loss": 0.0334, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 25.559433381235884, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.017883356660604477, |
|
"eval_runtime": 22.5913, |
|
"eval_samples_per_second": 3.984, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 25.662081708068158, |
|
"grad_norm": 0.22908490896224976, |
|
"learning_rate": 0.00024387986011108827, |
|
"loss": 0.0336, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 25.662081708068158, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.01878712698817253, |
|
"eval_runtime": 22.5067, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 25.76473003490043, |
|
"grad_norm": 0.23008745908737183, |
|
"learning_rate": 0.00024285126517177536, |
|
"loss": 0.0337, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 25.76473003490043, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.018499523401260376, |
|
"eval_runtime": 22.5329, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 25.867378361732705, |
|
"grad_norm": 0.197307750582695, |
|
"learning_rate": 0.00024182267023246246, |
|
"loss": 0.0337, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 25.867378361732705, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01780012436211109, |
|
"eval_runtime": 22.5224, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 25.970026688564978, |
|
"grad_norm": 0.24368754029273987, |
|
"learning_rate": 0.00024079407529314955, |
|
"loss": 0.0338, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 25.970026688564978, |
|
"eval_cer": 0.009626777251184835, |
|
"eval_loss": 0.017333028838038445, |
|
"eval_runtime": 22.4863, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 26.072675015397248, |
|
"grad_norm": 0.17760606110095978, |
|
"learning_rate": 0.00023976548035383667, |
|
"loss": 0.0328, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 26.072675015397248, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.017533306032419205, |
|
"eval_runtime": 22.6165, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 26.17532334222952, |
|
"grad_norm": 0.24285702407360077, |
|
"learning_rate": 0.00023873688541452376, |
|
"loss": 0.0325, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 26.17532334222952, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.01821918785572052, |
|
"eval_runtime": 22.4982, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 26.277971669061795, |
|
"grad_norm": 0.216440811753273, |
|
"learning_rate": 0.00023770829047521086, |
|
"loss": 0.0328, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 26.277971669061795, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.01876477338373661, |
|
"eval_runtime": 22.6643, |
|
"eval_samples_per_second": 3.971, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 26.380619995894065, |
|
"grad_norm": 0.18202808499336243, |
|
"learning_rate": 0.00023667969553589798, |
|
"loss": 0.0327, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 26.380619995894065, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.017915373668074608, |
|
"eval_runtime": 22.5776, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 26.48326832272634, |
|
"grad_norm": 0.183961883187294, |
|
"learning_rate": 0.00023565110059658507, |
|
"loss": 0.0331, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 26.48326832272634, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.018149225041270256, |
|
"eval_runtime": 22.4894, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 26.585916649558612, |
|
"grad_norm": 0.2554219365119934, |
|
"learning_rate": 0.00023462250565727216, |
|
"loss": 0.033, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 26.585916649558612, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.017295319586992264, |
|
"eval_runtime": 22.4744, |
|
"eval_samples_per_second": 4.005, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 26.688564976390886, |
|
"grad_norm": 0.3923017978668213, |
|
"learning_rate": 0.00023359391071795926, |
|
"loss": 0.0334, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 26.688564976390886, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.018423665314912796, |
|
"eval_runtime": 22.6112, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 26.791213303223156, |
|
"grad_norm": 0.413510262966156, |
|
"learning_rate": 0.00023256531577864638, |
|
"loss": 0.0334, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 26.791213303223156, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.017927566543221474, |
|
"eval_runtime": 22.5258, |
|
"eval_samples_per_second": 3.995, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 26.89386163005543, |
|
"grad_norm": 0.25007760524749756, |
|
"learning_rate": 0.00023153672083933347, |
|
"loss": 0.0333, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 26.89386163005543, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.017284687608480453, |
|
"eval_runtime": 22.5825, |
|
"eval_samples_per_second": 3.985, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 26.996509956887703, |
|
"grad_norm": 0.3856132924556732, |
|
"learning_rate": 0.00023050812590002056, |
|
"loss": 0.0336, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 26.996509956887703, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.017093736678361893, |
|
"eval_runtime": 22.7688, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 27.099158283719976, |
|
"grad_norm": 0.3299943208694458, |
|
"learning_rate": 0.00022947953096070768, |
|
"loss": 0.0321, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 27.099158283719976, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.017240121960639954, |
|
"eval_runtime": 22.5916, |
|
"eval_samples_per_second": 3.984, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 27.201806610552246, |
|
"grad_norm": 0.1730368584394455, |
|
"learning_rate": 0.00022845093602139478, |
|
"loss": 0.0327, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 27.201806610552246, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.01783335767686367, |
|
"eval_runtime": 22.2378, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 27.30445493738452, |
|
"grad_norm": 0.3776761293411255, |
|
"learning_rate": 0.00022742234108208187, |
|
"loss": 0.0326, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 27.30445493738452, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.017694596201181412, |
|
"eval_runtime": 22.5643, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.040214477211796246, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 27.407103264216794, |
|
"grad_norm": 0.17738159000873566, |
|
"learning_rate": 0.00022639374614276896, |
|
"loss": 0.0324, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 27.407103264216794, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.017476912587881088, |
|
"eval_runtime": 22.7303, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 27.509751591049067, |
|
"grad_norm": 0.2550307512283325, |
|
"learning_rate": 0.00022536515120345608, |
|
"loss": 0.0325, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 27.509751591049067, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.01743141934275627, |
|
"eval_runtime": 22.7564, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 27.612399917881337, |
|
"grad_norm": 0.16253231465816498, |
|
"learning_rate": 0.00022433655626414317, |
|
"loss": 0.0327, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 27.612399917881337, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.017301015555858612, |
|
"eval_runtime": 23.1263, |
|
"eval_samples_per_second": 3.892, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 27.71504824471361, |
|
"grad_norm": 0.15195374190807343, |
|
"learning_rate": 0.00022330796132483027, |
|
"loss": 0.0322, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 27.71504824471361, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.017138667404651642, |
|
"eval_runtime": 23.3844, |
|
"eval_samples_per_second": 3.849, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 27.817696571545884, |
|
"grad_norm": 0.2660813331604004, |
|
"learning_rate": 0.0002222793663855174, |
|
"loss": 0.0332, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 27.817696571545884, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.016922015696763992, |
|
"eval_runtime": 22.6918, |
|
"eval_samples_per_second": 3.966, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 27.920344898378158, |
|
"grad_norm": 0.20602907240390778, |
|
"learning_rate": 0.00022125077144620448, |
|
"loss": 0.0328, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 27.920344898378158, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.016544727608561516, |
|
"eval_runtime": 22.5504, |
|
"eval_samples_per_second": 3.991, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 28.022993225210428, |
|
"grad_norm": 0.22045257687568665, |
|
"learning_rate": 0.00022022217650689157, |
|
"loss": 0.0324, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 28.022993225210428, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.016568990424275398, |
|
"eval_runtime": 22.6458, |
|
"eval_samples_per_second": 3.974, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 28.1256415520427, |
|
"grad_norm": 0.18806225061416626, |
|
"learning_rate": 0.0002191935815675787, |
|
"loss": 0.0315, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 28.1256415520427, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.016486881300807, |
|
"eval_runtime": 22.6614, |
|
"eval_samples_per_second": 3.972, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 28.228289878874975, |
|
"grad_norm": 0.1898849457502365, |
|
"learning_rate": 0.0002181649866282658, |
|
"loss": 0.0319, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 28.228289878874975, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.016169264912605286, |
|
"eval_runtime": 22.5647, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 28.33093820570725, |
|
"grad_norm": 0.27770882844924927, |
|
"learning_rate": 0.00021713639168895288, |
|
"loss": 0.0323, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 28.33093820570725, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01689663529396057, |
|
"eval_runtime": 22.5984, |
|
"eval_samples_per_second": 3.983, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 28.43358653253952, |
|
"grad_norm": 0.23435185849666595, |
|
"learning_rate": 0.00021610779674963997, |
|
"loss": 0.0322, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 28.43358653253952, |
|
"eval_cer": 0.009922985781990521, |
|
"eval_loss": 0.016887083649635315, |
|
"eval_runtime": 23.1898, |
|
"eval_samples_per_second": 3.881, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 28.536234859371792, |
|
"grad_norm": 0.19802525639533997, |
|
"learning_rate": 0.0002150792018103271, |
|
"loss": 0.0319, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 28.536234859371792, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.016642894595861435, |
|
"eval_runtime": 22.9038, |
|
"eval_samples_per_second": 3.929, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 28.638883186204065, |
|
"grad_norm": 0.33312317728996277, |
|
"learning_rate": 0.0002140506068710142, |
|
"loss": 0.0324, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 28.638883186204065, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.016943588852882385, |
|
"eval_runtime": 22.6215, |
|
"eval_samples_per_second": 3.979, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 28.74153151303634, |
|
"grad_norm": 0.19455067813396454, |
|
"learning_rate": 0.00021302201193170128, |
|
"loss": 0.0326, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 28.74153151303634, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.016804693266749382, |
|
"eval_runtime": 22.602, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 28.84417983986861, |
|
"grad_norm": 0.14672687649726868, |
|
"learning_rate": 0.0002119934169923884, |
|
"loss": 0.0328, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 28.84417983986861, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.016962364315986633, |
|
"eval_runtime": 22.456, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 28.946828166700882, |
|
"grad_norm": 0.3485030233860016, |
|
"learning_rate": 0.00021096482205307552, |
|
"loss": 0.0322, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 28.946828166700882, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.017243940383195877, |
|
"eval_runtime": 22.6149, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 29.049476493533156, |
|
"grad_norm": 0.20129866898059845, |
|
"learning_rate": 0.00020993622711376261, |
|
"loss": 0.0317, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 29.049476493533156, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.01694132201373577, |
|
"eval_runtime": 22.5131, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 29.15212482036543, |
|
"grad_norm": 0.32001006603240967, |
|
"learning_rate": 0.0002089076321744497, |
|
"loss": 0.0315, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 29.15212482036543, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.01657693088054657, |
|
"eval_runtime": 22.5678, |
|
"eval_samples_per_second": 3.988, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 29.2547731471977, |
|
"grad_norm": 0.21219150722026825, |
|
"learning_rate": 0.00020787903723513683, |
|
"loss": 0.0315, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 29.2547731471977, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.016005953773856163, |
|
"eval_runtime": 22.3068, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 29.357421474029973, |
|
"grad_norm": 0.20887607336044312, |
|
"learning_rate": 0.00020685044229582392, |
|
"loss": 0.0319, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 29.357421474029973, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.016784099861979485, |
|
"eval_runtime": 22.3044, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 29.460069800862247, |
|
"grad_norm": 0.28705254197120667, |
|
"learning_rate": 0.00020582184735651101, |
|
"loss": 0.0314, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 29.460069800862247, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.016876235604286194, |
|
"eval_runtime": 22.2793, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 29.56271812769452, |
|
"grad_norm": 0.323476642370224, |
|
"learning_rate": 0.00020479325241719813, |
|
"loss": 0.032, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 29.56271812769452, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.016845477744936943, |
|
"eval_runtime": 22.1977, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 29.66536645452679, |
|
"grad_norm": 0.1829458326101303, |
|
"learning_rate": 0.00020376465747788523, |
|
"loss": 0.0321, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 29.66536645452679, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.015846768394112587, |
|
"eval_runtime": 22.1026, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 29.768014781359064, |
|
"grad_norm": 0.25482961535453796, |
|
"learning_rate": 0.00020273606253857232, |
|
"loss": 0.032, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 29.768014781359064, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.015906278043985367, |
|
"eval_runtime": 22.3867, |
|
"eval_samples_per_second": 4.02, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 29.870663108191337, |
|
"grad_norm": 0.19813385605812073, |
|
"learning_rate": 0.00020170746759925941, |
|
"loss": 0.0319, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 29.870663108191337, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.016582278534770012, |
|
"eval_runtime": 22.6644, |
|
"eval_samples_per_second": 3.971, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 29.97331143502361, |
|
"grad_norm": 0.23543916642665863, |
|
"learning_rate": 0.00020067887265994653, |
|
"loss": 0.0321, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 29.97331143502361, |
|
"eval_cer": 0.009774881516587678, |
|
"eval_loss": 0.01627834513783455, |
|
"eval_runtime": 22.7565, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 30.07595976185588, |
|
"grad_norm": 0.20127102732658386, |
|
"learning_rate": 0.00019965027772063363, |
|
"loss": 0.0309, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 30.07595976185588, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.016053717583417892, |
|
"eval_runtime": 22.4512, |
|
"eval_samples_per_second": 4.009, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 30.178608088688154, |
|
"grad_norm": 0.19356395304203033, |
|
"learning_rate": 0.00019862168278132072, |
|
"loss": 0.031, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 30.178608088688154, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.01641259714961052, |
|
"eval_runtime": 22.3968, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 30.281256415520428, |
|
"grad_norm": 0.3252677619457245, |
|
"learning_rate": 0.00019759308784200784, |
|
"loss": 0.0314, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 30.281256415520428, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.015991076827049255, |
|
"eval_runtime": 22.4292, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 30.3839047423527, |
|
"grad_norm": 0.1807209700345993, |
|
"learning_rate": 0.00019656449290269493, |
|
"loss": 0.0312, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 30.3839047423527, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.015960650518536568, |
|
"eval_runtime": 22.2631, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 30.48655306918497, |
|
"grad_norm": 0.2719903588294983, |
|
"learning_rate": 0.00019553589796338203, |
|
"loss": 0.0312, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 30.48655306918497, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.015721548348665237, |
|
"eval_runtime": 22.1868, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 30.589201396017245, |
|
"grad_norm": 0.32360509037971497, |
|
"learning_rate": 0.00019450730302406912, |
|
"loss": 0.0314, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 30.589201396017245, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01612556166946888, |
|
"eval_runtime": 22.345, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 30.69184972284952, |
|
"grad_norm": 0.28737571835517883, |
|
"learning_rate": 0.00019347870808475624, |
|
"loss": 0.0317, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 30.69184972284952, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.016010984778404236, |
|
"eval_runtime": 22.4982, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 30.794498049681792, |
|
"grad_norm": 0.16976721584796906, |
|
"learning_rate": 0.00019245011314544333, |
|
"loss": 0.0315, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 30.794498049681792, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.01699880138039589, |
|
"eval_runtime": 22.4468, |
|
"eval_samples_per_second": 4.009, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 30.897146376514062, |
|
"grad_norm": 0.2326597273349762, |
|
"learning_rate": 0.00019142151820613043, |
|
"loss": 0.0315, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 30.897146376514062, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.016296546906232834, |
|
"eval_runtime": 22.4046, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03842716711349419, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 30.999794703346335, |
|
"grad_norm": 0.4060353934764862, |
|
"learning_rate": 0.00019039292326681755, |
|
"loss": 0.0318, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 30.999794703346335, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.01701035536825657, |
|
"eval_runtime": 22.4393, |
|
"eval_samples_per_second": 4.011, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 31.10244303017861, |
|
"grad_norm": 0.19074688851833344, |
|
"learning_rate": 0.00018936432832750464, |
|
"loss": 0.0304, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 31.10244303017861, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.016249870881438255, |
|
"eval_runtime": 22.6009, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 31.20509135701088, |
|
"grad_norm": 0.1725562959909439, |
|
"learning_rate": 0.00018833573338819173, |
|
"loss": 0.0309, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 31.20509135701088, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.01692904904484749, |
|
"eval_runtime": 22.3452, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 31.307739683843153, |
|
"grad_norm": 0.18745183944702148, |
|
"learning_rate": 0.00018730713844887883, |
|
"loss": 0.0308, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 31.307739683843153, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.01693115197122097, |
|
"eval_runtime": 22.313, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 31.410388010675426, |
|
"grad_norm": 0.2364443838596344, |
|
"learning_rate": 0.00018627854350956595, |
|
"loss": 0.0311, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 31.410388010675426, |
|
"eval_cer": 0.009626777251184835, |
|
"eval_loss": 0.016600091010332108, |
|
"eval_runtime": 22.2551, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 31.5130363375077, |
|
"grad_norm": 0.19486719369888306, |
|
"learning_rate": 0.00018524994857025304, |
|
"loss": 0.0309, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 31.5130363375077, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.01619219221174717, |
|
"eval_runtime": 22.3592, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 31.61568466433997, |
|
"grad_norm": 0.20491057634353638, |
|
"learning_rate": 0.00018422135363094013, |
|
"loss": 0.0308, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 31.61568466433997, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.016473235562443733, |
|
"eval_runtime": 22.4059, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 31.718332991172243, |
|
"grad_norm": 0.26360154151916504, |
|
"learning_rate": 0.00018319275869162725, |
|
"loss": 0.0312, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 31.718332991172243, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.01562103908509016, |
|
"eval_runtime": 22.1604, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 31.820981318004517, |
|
"grad_norm": 0.15921452641487122, |
|
"learning_rate": 0.00018216416375231435, |
|
"loss": 0.0316, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 31.820981318004517, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.016504855826497078, |
|
"eval_runtime": 22.5365, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 31.92362964483679, |
|
"grad_norm": 0.27201271057128906, |
|
"learning_rate": 0.00018113556881300144, |
|
"loss": 0.0314, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 31.92362964483679, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.015464858151972294, |
|
"eval_runtime": 22.5378, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 32.02627797166906, |
|
"grad_norm": 0.3564852178096771, |
|
"learning_rate": 0.00018010697387368853, |
|
"loss": 0.0309, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 32.02627797166906, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.01600516401231289, |
|
"eval_runtime": 22.3728, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 32.128926298501334, |
|
"grad_norm": 0.26071467995643616, |
|
"learning_rate": 0.00017907837893437565, |
|
"loss": 0.0307, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 32.128926298501334, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.016347436234354973, |
|
"eval_runtime": 22.155, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 32.23157462533361, |
|
"grad_norm": 0.17180919647216797, |
|
"learning_rate": 0.00017804978399506275, |
|
"loss": 0.0307, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 32.23157462533361, |
|
"eval_cer": 0.009182464454976303, |
|
"eval_loss": 0.01613152027130127, |
|
"eval_runtime": 22.4889, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 32.33422295216588, |
|
"grad_norm": 0.16418492794036865, |
|
"learning_rate": 0.00017702118905574984, |
|
"loss": 0.0301, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 32.33422295216588, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.015433421358466148, |
|
"eval_runtime": 22.2866, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 32.436871278998154, |
|
"grad_norm": 0.2582835853099823, |
|
"learning_rate": 0.00017599259411643696, |
|
"loss": 0.0307, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 32.436871278998154, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.015836581587791443, |
|
"eval_runtime": 22.5371, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 32.53951960583043, |
|
"grad_norm": 0.2574046552181244, |
|
"learning_rate": 0.00017496399917712405, |
|
"loss": 0.0308, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 32.53951960583043, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.015740592032670975, |
|
"eval_runtime": 22.3104, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 32.642167932662694, |
|
"grad_norm": 0.17120341956615448, |
|
"learning_rate": 0.00017393540423781115, |
|
"loss": 0.0304, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 32.642167932662694, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01630273461341858, |
|
"eval_runtime": 22.6719, |
|
"eval_samples_per_second": 3.97, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 32.74481625949497, |
|
"grad_norm": 0.27363935112953186, |
|
"learning_rate": 0.00017290680929849824, |
|
"loss": 0.0309, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 32.74481625949497, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.016513481736183167, |
|
"eval_runtime": 22.5102, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 32.84746458632724, |
|
"grad_norm": 0.29559651017189026, |
|
"learning_rate": 0.00017187821435918536, |
|
"loss": 0.0307, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 32.84746458632724, |
|
"eval_cer": 0.009330568720379146, |
|
"eval_loss": 0.01666964590549469, |
|
"eval_runtime": 22.527, |
|
"eval_samples_per_second": 3.995, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 32.950112913159515, |
|
"grad_norm": 0.1644178181886673, |
|
"learning_rate": 0.00017084961941987245, |
|
"loss": 0.0305, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 32.950112913159515, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.01613970287144184, |
|
"eval_runtime": 22.6019, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 33.05276123999179, |
|
"grad_norm": 0.24239134788513184, |
|
"learning_rate": 0.00016982102448055955, |
|
"loss": 0.0301, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 33.05276123999179, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.015571474097669125, |
|
"eval_runtime": 22.7263, |
|
"eval_samples_per_second": 3.96, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 33.15540956682406, |
|
"grad_norm": 0.16490726172924042, |
|
"learning_rate": 0.00016879242954124667, |
|
"loss": 0.0296, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 33.15540956682406, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01544241514056921, |
|
"eval_runtime": 22.655, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 33.258057893656336, |
|
"grad_norm": 0.19249847531318665, |
|
"learning_rate": 0.00016776383460193376, |
|
"loss": 0.0299, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 33.258057893656336, |
|
"eval_cer": 0.009478672985781991, |
|
"eval_loss": 0.015552397817373276, |
|
"eval_runtime": 22.3811, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03932082216264522, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 33.36070622048861, |
|
"grad_norm": 0.2540992200374603, |
|
"learning_rate": 0.00016673523966262085, |
|
"loss": 0.0298, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 33.36070622048861, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.01545222382992506, |
|
"eval_runtime": 22.2235, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 33.463354547320876, |
|
"grad_norm": 0.17843221127986908, |
|
"learning_rate": 0.00016570664472330795, |
|
"loss": 0.0305, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 33.463354547320876, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.015175366774201393, |
|
"eval_runtime": 22.1848, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 33.56600287415315, |
|
"grad_norm": 0.25464072823524475, |
|
"learning_rate": 0.00016467804978399507, |
|
"loss": 0.0306, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 33.56600287415315, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.014621883630752563, |
|
"eval_runtime": 22.1184, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 33.66865120098542, |
|
"grad_norm": 0.16736038029193878, |
|
"learning_rate": 0.00016364945484468216, |
|
"loss": 0.0308, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 33.66865120098542, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.01590174064040184, |
|
"eval_runtime": 22.2326, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 33.771299527817696, |
|
"grad_norm": 0.24227890372276306, |
|
"learning_rate": 0.00016262085990536925, |
|
"loss": 0.0304, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 33.771299527817696, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.015669850632548332, |
|
"eval_runtime": 22.3742, |
|
"eval_samples_per_second": 4.022, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 33.87394785464997, |
|
"grad_norm": 0.18546319007873535, |
|
"learning_rate": 0.00016159226496605637, |
|
"loss": 0.0306, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 33.87394785464997, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.015102799981832504, |
|
"eval_runtime": 22.5007, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 33.97659618148224, |
|
"grad_norm": 0.23194563388824463, |
|
"learning_rate": 0.00016056367002674347, |
|
"loss": 0.0305, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 33.97659618148224, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.015225354582071304, |
|
"eval_runtime": 22.6228, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 34.07924450831452, |
|
"grad_norm": 0.18987098336219788, |
|
"learning_rate": 0.00015953507508743056, |
|
"loss": 0.0294, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 34.07924450831452, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.015497728250920773, |
|
"eval_runtime": 22.4328, |
|
"eval_samples_per_second": 4.012, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 34.18189283514679, |
|
"grad_norm": 0.16452568769454956, |
|
"learning_rate": 0.00015850648014811765, |
|
"loss": 0.0299, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 34.18189283514679, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01510694995522499, |
|
"eval_runtime": 22.2639, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 34.28454116197906, |
|
"grad_norm": 0.2669082283973694, |
|
"learning_rate": 0.00015747788520880477, |
|
"loss": 0.0297, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 34.28454116197906, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.015526418574154377, |
|
"eval_runtime": 22.4447, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 34.38718948881133, |
|
"grad_norm": 0.23023459315299988, |
|
"learning_rate": 0.00015644929026949187, |
|
"loss": 0.0298, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 34.38718948881133, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.01542733982205391, |
|
"eval_runtime": 22.4734, |
|
"eval_samples_per_second": 4.005, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 34.489837815643604, |
|
"grad_norm": 0.3047637343406677, |
|
"learning_rate": 0.00015542069533017896, |
|
"loss": 0.0301, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 34.489837815643604, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.0148982098326087, |
|
"eval_runtime": 22.2904, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03753351206434316, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 34.59248614247588, |
|
"grad_norm": 0.2189682275056839, |
|
"learning_rate": 0.00015439210039086608, |
|
"loss": 0.0299, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 34.59248614247588, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.014870991930365562, |
|
"eval_runtime": 22.4248, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 34.69513446930815, |
|
"grad_norm": 0.1642763465642929, |
|
"learning_rate": 0.00015336350545155317, |
|
"loss": 0.03, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 34.69513446930815, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.015146846882998943, |
|
"eval_runtime": 22.5811, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 34.797782796140424, |
|
"grad_norm": 0.21596594154834747, |
|
"learning_rate": 0.00015233491051224027, |
|
"loss": 0.03, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 34.797782796140424, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.014487739652395248, |
|
"eval_runtime": 22.3193, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 34.9004311229727, |
|
"grad_norm": 0.15714465081691742, |
|
"learning_rate": 0.00015130631557292736, |
|
"loss": 0.0305, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 34.9004311229727, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.014626013115048409, |
|
"eval_runtime": 22.3971, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 35.00307944980497, |
|
"grad_norm": 0.2639608383178711, |
|
"learning_rate": 0.00015027772063361448, |
|
"loss": 0.03, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 35.00307944980497, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.014959813095629215, |
|
"eval_runtime": 22.4235, |
|
"eval_samples_per_second": 4.014, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 35.10572777663724, |
|
"grad_norm": 0.1461016684770584, |
|
"learning_rate": 0.00014924912569430157, |
|
"loss": 0.029, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 35.10572777663724, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.015357970260083675, |
|
"eval_runtime": 22.4778, |
|
"eval_samples_per_second": 4.004, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 35.20837610346951, |
|
"grad_norm": 0.21027566492557526, |
|
"learning_rate": 0.00014822053075498867, |
|
"loss": 0.0294, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 35.20837610346951, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.014925923198461533, |
|
"eval_runtime": 22.563, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 35.311024430301785, |
|
"grad_norm": 0.26173582673072815, |
|
"learning_rate": 0.0001471919358156758, |
|
"loss": 0.0294, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 35.311024430301785, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.014718250371515751, |
|
"eval_runtime": 22.4858, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 35.41367275713406, |
|
"grad_norm": 0.21235166490077972, |
|
"learning_rate": 0.0001461633408763629, |
|
"loss": 0.0295, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 35.41367275713406, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.014633492566645145, |
|
"eval_runtime": 22.4549, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 35.51632108396633, |
|
"grad_norm": 0.2826511263847351, |
|
"learning_rate": 0.00014513474593705, |
|
"loss": 0.0296, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 35.51632108396633, |
|
"eval_cer": 0.00903436018957346, |
|
"eval_loss": 0.015351605601608753, |
|
"eval_runtime": 22.6095, |
|
"eval_samples_per_second": 3.981, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 35.618969410798606, |
|
"grad_norm": 0.15846475958824158, |
|
"learning_rate": 0.00014410615099773712, |
|
"loss": 0.0298, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 35.618969410798606, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.014954261481761932, |
|
"eval_runtime": 22.6016, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 35.72161773763088, |
|
"grad_norm": 0.30824708938598633, |
|
"learning_rate": 0.0001430775560584242, |
|
"loss": 0.0298, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 35.72161773763088, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.015362138859927654, |
|
"eval_runtime": 22.4427, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 35.824266064463146, |
|
"grad_norm": 0.29468515515327454, |
|
"learning_rate": 0.0001420489611191113, |
|
"loss": 0.0297, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 35.824266064463146, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.014831768348813057, |
|
"eval_runtime": 22.4167, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 35.92691439129542, |
|
"grad_norm": 0.3372247815132141, |
|
"learning_rate": 0.0001410203661797984, |
|
"loss": 0.0297, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 35.92691439129542, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.015637291595339775, |
|
"eval_runtime": 22.6706, |
|
"eval_samples_per_second": 3.97, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 36.02956271812769, |
|
"grad_norm": 0.2713555693626404, |
|
"learning_rate": 0.00013999177124048552, |
|
"loss": 0.0295, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 36.02956271812769, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01471824198961258, |
|
"eval_runtime": 22.5633, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 36.132211044959966, |
|
"grad_norm": 0.2100389152765274, |
|
"learning_rate": 0.0001389631763011726, |
|
"loss": 0.0288, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 36.132211044959966, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.014739991165697575, |
|
"eval_runtime": 22.6462, |
|
"eval_samples_per_second": 3.974, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 36.23485937179224, |
|
"grad_norm": 0.2058141529560089, |
|
"learning_rate": 0.0001379345813618597, |
|
"loss": 0.0294, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 36.23485937179224, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.014310094527900219, |
|
"eval_runtime": 22.4136, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 36.33750769862451, |
|
"grad_norm": 0.21993263065814972, |
|
"learning_rate": 0.00013690598642254683, |
|
"loss": 0.0288, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 36.33750769862451, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.01488146185874939, |
|
"eval_runtime": 22.7382, |
|
"eval_samples_per_second": 3.958, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 36.44015602545679, |
|
"grad_norm": 0.44507354497909546, |
|
"learning_rate": 0.00013587739148323392, |
|
"loss": 0.0291, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 36.44015602545679, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.014914470724761486, |
|
"eval_runtime": 22.4894, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 36.54280435228906, |
|
"grad_norm": 0.29088860750198364, |
|
"learning_rate": 0.000134848796543921, |
|
"loss": 0.0297, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 36.54280435228906, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.015177594497799873, |
|
"eval_runtime": 22.3664, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 36.64545267912133, |
|
"grad_norm": 0.28655806183815, |
|
"learning_rate": 0.0001338202016046081, |
|
"loss": 0.0291, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 36.64545267912133, |
|
"eval_cer": 0.008886255924170616, |
|
"eval_loss": 0.014854129403829575, |
|
"eval_runtime": 22.3549, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 36.7481010059536, |
|
"grad_norm": 0.16526740789413452, |
|
"learning_rate": 0.00013279160666529523, |
|
"loss": 0.0293, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 36.7481010059536, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.015399602241814137, |
|
"eval_runtime": 22.5162, |
|
"eval_samples_per_second": 3.997, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 36.850749332785874, |
|
"grad_norm": 0.16871041059494019, |
|
"learning_rate": 0.00013176301172598232, |
|
"loss": 0.0296, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 36.850749332785874, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.015135127119719982, |
|
"eval_runtime": 22.5959, |
|
"eval_samples_per_second": 3.983, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 36.95339765961815, |
|
"grad_norm": 0.3259669542312622, |
|
"learning_rate": 0.0001307344167866694, |
|
"loss": 0.0296, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 36.95339765961815, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.015445960685610771, |
|
"eval_runtime": 22.4067, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 37.05604598645042, |
|
"grad_norm": 0.2573050260543823, |
|
"learning_rate": 0.00012970582184735653, |
|
"loss": 0.0288, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 37.05604598645042, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.015334886498749256, |
|
"eval_runtime": 22.4559, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 37.158694313282695, |
|
"grad_norm": 0.3997851610183716, |
|
"learning_rate": 0.00012867722690804363, |
|
"loss": 0.0285, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 37.158694313282695, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.015184923075139523, |
|
"eval_runtime": 22.5665, |
|
"eval_samples_per_second": 3.988, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03663985701519214, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 37.26134264011497, |
|
"grad_norm": 0.2915215790271759, |
|
"learning_rate": 0.00012764863196873072, |
|
"loss": 0.0287, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 37.26134264011497, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.01511069294065237, |
|
"eval_runtime": 22.7402, |
|
"eval_samples_per_second": 3.958, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 37.36399096694724, |
|
"grad_norm": 0.1808217316865921, |
|
"learning_rate": 0.0001266200370294178, |
|
"loss": 0.0288, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 37.36399096694724, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.015011030249297619, |
|
"eval_runtime": 22.3683, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 37.46663929377951, |
|
"grad_norm": 0.2159794569015503, |
|
"learning_rate": 0.00012559144209010493, |
|
"loss": 0.0289, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 37.46663929377951, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.014206486754119396, |
|
"eval_runtime": 22.2265, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.035746201966041107, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 37.56928762061178, |
|
"grad_norm": 0.1461094170808792, |
|
"learning_rate": 0.00012456284715079203, |
|
"loss": 0.0291, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 37.56928762061178, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.01417731773108244, |
|
"eval_runtime": 22.6758, |
|
"eval_samples_per_second": 3.969, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 37.671935947444055, |
|
"grad_norm": 0.18427909910678864, |
|
"learning_rate": 0.00012353425221147912, |
|
"loss": 0.0292, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 37.671935947444055, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.014333653263747692, |
|
"eval_runtime": 22.53, |
|
"eval_samples_per_second": 3.995, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 37.77458427427633, |
|
"grad_norm": 0.17020884156227112, |
|
"learning_rate": 0.00012250565727216624, |
|
"loss": 0.0292, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 37.77458427427633, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.014181572012603283, |
|
"eval_runtime": 22.4523, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 37.8772326011086, |
|
"grad_norm": 0.15650001168251038, |
|
"learning_rate": 0.00012147706233285333, |
|
"loss": 0.0289, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 37.8772326011086, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.014552351087331772, |
|
"eval_runtime": 22.33, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 37.979880927940876, |
|
"grad_norm": 0.25610971450805664, |
|
"learning_rate": 0.00012044846739354043, |
|
"loss": 0.0293, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 37.979880927940876, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.01422215811908245, |
|
"eval_runtime": 22.9345, |
|
"eval_samples_per_second": 3.924, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 38.08252925477315, |
|
"grad_norm": 0.25401365756988525, |
|
"learning_rate": 0.00011941987245422753, |
|
"loss": 0.0286, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 38.08252925477315, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.014245335012674332, |
|
"eval_runtime": 22.3755, |
|
"eval_samples_per_second": 4.022, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 38.18517758160542, |
|
"grad_norm": 0.28409621119499207, |
|
"learning_rate": 0.00011839127751491462, |
|
"loss": 0.0281, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 38.18517758160542, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.014165487140417099, |
|
"eval_runtime": 22.2381, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 38.28782590843769, |
|
"grad_norm": 0.20209653675556183, |
|
"learning_rate": 0.00011736268257560173, |
|
"loss": 0.0285, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 38.28782590843769, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.014026058837771416, |
|
"eval_runtime": 22.4373, |
|
"eval_samples_per_second": 4.011, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 38.39047423526996, |
|
"grad_norm": 0.2063857465982437, |
|
"learning_rate": 0.00011633408763628884, |
|
"loss": 0.029, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 38.39047423526996, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.014216107316315174, |
|
"eval_runtime": 22.5613, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 38.493122562102236, |
|
"grad_norm": 0.1818021684885025, |
|
"learning_rate": 0.00011530549269697593, |
|
"loss": 0.0285, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 38.493122562102236, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.014219836331903934, |
|
"eval_runtime": 22.5037, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 38.59577088893451, |
|
"grad_norm": 0.18460506200790405, |
|
"learning_rate": 0.00011427689775766304, |
|
"loss": 0.0285, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 38.59577088893451, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.013656863942742348, |
|
"eval_runtime": 22.652, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 38.69841921576678, |
|
"grad_norm": 0.227520152926445, |
|
"learning_rate": 0.00011324830281835013, |
|
"loss": 0.0288, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 38.69841921576678, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.01367582194507122, |
|
"eval_runtime": 22.645, |
|
"eval_samples_per_second": 3.974, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 38.80106754259906, |
|
"grad_norm": 0.1951012909412384, |
|
"learning_rate": 0.00011221970787903724, |
|
"loss": 0.0287, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 38.80106754259906, |
|
"eval_cer": 0.008738151658767773, |
|
"eval_loss": 0.014322535134851933, |
|
"eval_runtime": 22.4631, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 38.90371586943133, |
|
"grad_norm": 0.2870897352695465, |
|
"learning_rate": 0.00011119111293972433, |
|
"loss": 0.0288, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 38.90371586943133, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.01387989055365324, |
|
"eval_runtime": 22.1509, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 39.006364196263604, |
|
"grad_norm": 0.1880166083574295, |
|
"learning_rate": 0.00011016251800041144, |
|
"loss": 0.0285, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 39.006364196263604, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.014345232397317886, |
|
"eval_runtime": 22.2937, |
|
"eval_samples_per_second": 4.037, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 39.10901252309587, |
|
"grad_norm": 0.30459660291671753, |
|
"learning_rate": 0.00010913392306109854, |
|
"loss": 0.0279, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 39.10901252309587, |
|
"eval_cer": 0.008590047393364929, |
|
"eval_loss": 0.013818979263305664, |
|
"eval_runtime": 22.1693, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03485254691689008, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 39.211660849928144, |
|
"grad_norm": 0.2618006765842438, |
|
"learning_rate": 0.00010810532812178564, |
|
"loss": 0.0281, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 39.211660849928144, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.013895167037844658, |
|
"eval_runtime": 22.6129, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 39.31430917676042, |
|
"grad_norm": 0.1827547401189804, |
|
"learning_rate": 0.00010707673318247274, |
|
"loss": 0.0281, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 39.31430917676042, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.013730869628489017, |
|
"eval_runtime": 22.5617, |
|
"eval_samples_per_second": 3.989, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.028596961572832886, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 39.41695750359269, |
|
"grad_norm": 0.20615758001804352, |
|
"learning_rate": 0.00010604813824315984, |
|
"loss": 0.0283, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 39.41695750359269, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.014077040366828442, |
|
"eval_runtime": 22.4073, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 39.519605830424965, |
|
"grad_norm": 0.28781017661094666, |
|
"learning_rate": 0.00010501954330384694, |
|
"loss": 0.0284, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 39.519605830424965, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013616991229355335, |
|
"eval_runtime": 22.4299, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 39.62225415725724, |
|
"grad_norm": 0.1402323842048645, |
|
"learning_rate": 0.00010399094836453404, |
|
"loss": 0.0287, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 39.62225415725724, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.013439136557281017, |
|
"eval_runtime": 22.6508, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 39.72490248408951, |
|
"grad_norm": 0.22864773869514465, |
|
"learning_rate": 0.00010296235342522114, |
|
"loss": 0.0286, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 39.72490248408951, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.01389585342258215, |
|
"eval_runtime": 22.2256, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 39.827550810921785, |
|
"grad_norm": 0.20083709061145782, |
|
"learning_rate": 0.00010193375848590825, |
|
"loss": 0.0287, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 39.827550810921785, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.014366346411406994, |
|
"eval_runtime": 22.4657, |
|
"eval_samples_per_second": 4.006, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 39.93019913775405, |
|
"grad_norm": 0.26983821392059326, |
|
"learning_rate": 0.00010090516354659534, |
|
"loss": 0.0285, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 39.93019913775405, |
|
"eval_cer": 0.008145734597156399, |
|
"eval_loss": 0.014381532557308674, |
|
"eval_runtime": 22.541, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 40.032847464586325, |
|
"grad_norm": 0.361680269241333, |
|
"learning_rate": 9.987656860728245e-05, |
|
"loss": 0.0278, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 40.032847464586325, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.01420989166945219, |
|
"eval_runtime": 22.5652, |
|
"eval_samples_per_second": 3.988, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 40.1354957914186, |
|
"grad_norm": 0.17232652008533478, |
|
"learning_rate": 9.884797366796956e-05, |
|
"loss": 0.0276, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 40.1354957914186, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.014762550592422485, |
|
"eval_runtime": 22.4968, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 40.23814411825087, |
|
"grad_norm": 0.35826170444488525, |
|
"learning_rate": 9.781937872865666e-05, |
|
"loss": 0.0278, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 40.23814411825087, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.0146293630823493, |
|
"eval_runtime": 22.6001, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 40.340792445083146, |
|
"grad_norm": 0.41236042976379395, |
|
"learning_rate": 9.679078378934376e-05, |
|
"loss": 0.0279, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 40.340792445083146, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.014173084869980812, |
|
"eval_runtime": 22.3588, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 40.44344077191542, |
|
"grad_norm": 0.13260366022586823, |
|
"learning_rate": 9.576218885003086e-05, |
|
"loss": 0.0281, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 40.44344077191542, |
|
"eval_cer": 0.006812796208530806, |
|
"eval_loss": 0.013978157192468643, |
|
"eval_runtime": 22.7638, |
|
"eval_samples_per_second": 3.954, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.028596961572832886, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 40.54608909874769, |
|
"grad_norm": 0.1822642832994461, |
|
"learning_rate": 9.473359391071797e-05, |
|
"loss": 0.0282, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 40.54608909874769, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.014006286859512329, |
|
"eval_runtime": 23.5638, |
|
"eval_samples_per_second": 3.819, |
|
"eval_steps_per_second": 0.042, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 40.64873742557997, |
|
"grad_norm": 0.2515232563018799, |
|
"learning_rate": 9.370499897140506e-05, |
|
"loss": 0.0282, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 40.64873742557997, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013893728144466877, |
|
"eval_runtime": 23.3851, |
|
"eval_samples_per_second": 3.849, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 40.75138575241223, |
|
"grad_norm": 0.24263253808021545, |
|
"learning_rate": 9.267640403209217e-05, |
|
"loss": 0.0282, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 40.75138575241223, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.013497700914740562, |
|
"eval_runtime": 22.831, |
|
"eval_samples_per_second": 3.942, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 40.85403407924451, |
|
"grad_norm": 0.3449910581111908, |
|
"learning_rate": 9.164780909277926e-05, |
|
"loss": 0.0281, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 40.85403407924451, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.014030919410288334, |
|
"eval_runtime": 22.9536, |
|
"eval_samples_per_second": 3.921, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 40.95668240607678, |
|
"grad_norm": 0.34806039929389954, |
|
"learning_rate": 9.061921415346637e-05, |
|
"loss": 0.0283, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 40.95668240607678, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013621107675135136, |
|
"eval_runtime": 22.7573, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 41.05933073290905, |
|
"grad_norm": 0.18996162712574005, |
|
"learning_rate": 8.959061921415348e-05, |
|
"loss": 0.0275, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 41.05933073290905, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.013989981263875961, |
|
"eval_runtime": 22.345, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 41.16197905974133, |
|
"grad_norm": 0.22895024716854095, |
|
"learning_rate": 8.856202427484057e-05, |
|
"loss": 0.0278, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 41.16197905974133, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.013726425357162952, |
|
"eval_runtime": 22.1918, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 41.2646273865736, |
|
"grad_norm": 0.20046815276145935, |
|
"learning_rate": 8.753342933552768e-05, |
|
"loss": 0.0276, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 41.2646273865736, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.014126642607152462, |
|
"eval_runtime": 22.5351, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 41.367275713405874, |
|
"grad_norm": 0.2250887155532837, |
|
"learning_rate": 8.650483439621477e-05, |
|
"loss": 0.0277, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 41.367275713405874, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.013941235840320587, |
|
"eval_runtime": 22.4208, |
|
"eval_samples_per_second": 4.014, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 41.46992404023814, |
|
"grad_norm": 0.20660826563835144, |
|
"learning_rate": 8.547623945690188e-05, |
|
"loss": 0.0278, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 41.46992404023814, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013417248614132404, |
|
"eval_runtime": 22.349, |
|
"eval_samples_per_second": 4.027, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 41.572572367070414, |
|
"grad_norm": 0.20120590925216675, |
|
"learning_rate": 8.444764451758897e-05, |
|
"loss": 0.0278, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 41.572572367070414, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.01310759037733078, |
|
"eval_runtime": 22.3447, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 41.67522069390269, |
|
"grad_norm": 0.2711530327796936, |
|
"learning_rate": 8.341904957827608e-05, |
|
"loss": 0.0275, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 41.67522069390269, |
|
"eval_cer": 0.006664691943127962, |
|
"eval_loss": 0.012631074525415897, |
|
"eval_runtime": 22.6139, |
|
"eval_samples_per_second": 3.98, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.028596961572832886, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 41.77786902073496, |
|
"grad_norm": 0.16983696818351746, |
|
"learning_rate": 8.239045463896318e-05, |
|
"loss": 0.028, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 41.77786902073496, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012849998660385609, |
|
"eval_runtime": 22.4766, |
|
"eval_samples_per_second": 4.004, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 41.880517347567235, |
|
"grad_norm": 0.1700281947851181, |
|
"learning_rate": 8.136185969965028e-05, |
|
"loss": 0.028, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 41.880517347567235, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012665662914514542, |
|
"eval_runtime": 22.4018, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 41.98316567439951, |
|
"grad_norm": 0.23489758372306824, |
|
"learning_rate": 8.033326476033738e-05, |
|
"loss": 0.0278, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 41.98316567439951, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.013263718225061893, |
|
"eval_runtime": 22.3882, |
|
"eval_samples_per_second": 4.02, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 42.08581400123178, |
|
"grad_norm": 0.2607017159461975, |
|
"learning_rate": 7.930466982102448e-05, |
|
"loss": 0.0276, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 42.08581400123178, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013156604953110218, |
|
"eval_runtime": 22.6758, |
|
"eval_samples_per_second": 3.969, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 42.188462328064055, |
|
"grad_norm": 0.20945683121681213, |
|
"learning_rate": 7.827607488171158e-05, |
|
"loss": 0.0272, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 42.188462328064055, |
|
"eval_cer": 0.006664691943127962, |
|
"eval_loss": 0.012946737930178642, |
|
"eval_runtime": 22.4875, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.028596961572832886, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 42.29111065489632, |
|
"grad_norm": 0.25923091173171997, |
|
"learning_rate": 7.724747994239868e-05, |
|
"loss": 0.0274, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 42.29111065489632, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.012892471626400948, |
|
"eval_runtime": 22.5377, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 42.393758981728595, |
|
"grad_norm": 0.2663125693798065, |
|
"learning_rate": 7.621888500308578e-05, |
|
"loss": 0.0273, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 42.393758981728595, |
|
"eval_cer": 0.006664691943127962, |
|
"eval_loss": 0.012838827446103096, |
|
"eval_runtime": 22.3627, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 42.49640730856087, |
|
"grad_norm": 0.1551075577735901, |
|
"learning_rate": 7.519029006377289e-05, |
|
"loss": 0.0275, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 42.49640730856087, |
|
"eval_cer": 0.006664691943127962, |
|
"eval_loss": 0.01299965288490057, |
|
"eval_runtime": 22.7331, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 42.59905563539314, |
|
"grad_norm": 0.16298335790634155, |
|
"learning_rate": 7.416169512445998e-05, |
|
"loss": 0.0275, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 42.59905563539314, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013336721807718277, |
|
"eval_runtime": 22.767, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 42.701703962225416, |
|
"grad_norm": 0.20813694596290588, |
|
"learning_rate": 7.313310018514709e-05, |
|
"loss": 0.0278, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 42.701703962225416, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.0134715735912323, |
|
"eval_runtime": 22.6511, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 42.80435228905769, |
|
"grad_norm": 0.16702768206596375, |
|
"learning_rate": 7.210450524583418e-05, |
|
"loss": 0.0279, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 42.80435228905769, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.013460041023790836, |
|
"eval_runtime": 22.4175, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 42.90700061588996, |
|
"grad_norm": 0.2635751962661743, |
|
"learning_rate": 7.107591030652129e-05, |
|
"loss": 0.0273, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 42.90700061588996, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.013223396614193916, |
|
"eval_runtime": 22.8666, |
|
"eval_samples_per_second": 3.936, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 43.00964894272224, |
|
"grad_norm": 0.1874234676361084, |
|
"learning_rate": 7.004731536720838e-05, |
|
"loss": 0.0276, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 43.00964894272224, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013210386037826538, |
|
"eval_runtime": 22.5237, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 43.1122972695545, |
|
"grad_norm": 0.16425500810146332, |
|
"learning_rate": 6.901872042789549e-05, |
|
"loss": 0.0269, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 43.1122972695545, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013268969021737576, |
|
"eval_runtime": 22.5906, |
|
"eval_samples_per_second": 3.984, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 43.21494559638678, |
|
"grad_norm": 0.14309245347976685, |
|
"learning_rate": 6.79901254885826e-05, |
|
"loss": 0.0273, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 43.21494559638678, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.012923350557684898, |
|
"eval_runtime": 22.6017, |
|
"eval_samples_per_second": 3.982, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 43.31759392321905, |
|
"grad_norm": 0.3173372447490692, |
|
"learning_rate": 6.69615305492697e-05, |
|
"loss": 0.0271, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 43.31759392321905, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.013023743405938148, |
|
"eval_runtime": 22.4573, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 43.420242250051324, |
|
"grad_norm": 0.2641269266605377, |
|
"learning_rate": 6.593293560995681e-05, |
|
"loss": 0.0274, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 43.420242250051324, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.013114223256707191, |
|
"eval_runtime": 22.3329, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 43.5228905768836, |
|
"grad_norm": 0.20256099104881287, |
|
"learning_rate": 6.49043406706439e-05, |
|
"loss": 0.0273, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 43.5228905768836, |
|
"eval_cer": 0.008441943127962086, |
|
"eval_loss": 0.013389894738793373, |
|
"eval_runtime": 22.4353, |
|
"eval_samples_per_second": 4.012, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 43.62553890371587, |
|
"grad_norm": 0.2633448541164398, |
|
"learning_rate": 6.387574573133101e-05, |
|
"loss": 0.0273, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 43.62553890371587, |
|
"eval_cer": 0.008293838862559242, |
|
"eval_loss": 0.013175377622246742, |
|
"eval_runtime": 22.0476, |
|
"eval_samples_per_second": 4.082, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 43.728187230548144, |
|
"grad_norm": 0.14357689023017883, |
|
"learning_rate": 6.28471507920181e-05, |
|
"loss": 0.0271, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 43.728187230548144, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013218458741903305, |
|
"eval_runtime": 22.1794, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 43.83083555738042, |
|
"grad_norm": 0.20153464376926422, |
|
"learning_rate": 6.181855585270521e-05, |
|
"loss": 0.0275, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 43.83083555738042, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013227393850684166, |
|
"eval_runtime": 22.262, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 43.933483884212684, |
|
"grad_norm": 0.1699199080467224, |
|
"learning_rate": 6.07899609133923e-05, |
|
"loss": 0.0272, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 43.933483884212684, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013098928146064281, |
|
"eval_runtime": 22.2399, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 44.03613221104496, |
|
"grad_norm": 0.21964909136295319, |
|
"learning_rate": 5.97613659740794e-05, |
|
"loss": 0.0268, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 44.03613221104496, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013152836821973324, |
|
"eval_runtime": 22.2431, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 44.13878053787723, |
|
"grad_norm": 0.2735491693019867, |
|
"learning_rate": 5.873277103476652e-05, |
|
"loss": 0.0265, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 44.13878053787723, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013426227495074272, |
|
"eval_runtime": 22.6558, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 44.241428864709505, |
|
"grad_norm": 0.18371999263763428, |
|
"learning_rate": 5.7704176095453617e-05, |
|
"loss": 0.0269, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 44.241428864709505, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013290656730532646, |
|
"eval_runtime": 22.5067, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 44.34407719154178, |
|
"grad_norm": 0.3648182451725006, |
|
"learning_rate": 5.6675581156140717e-05, |
|
"loss": 0.0269, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 44.34407719154178, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.013338846154510975, |
|
"eval_runtime": 22.5224, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 44.44672551837405, |
|
"grad_norm": 0.14803066849708557, |
|
"learning_rate": 5.5646986216827816e-05, |
|
"loss": 0.0272, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 44.44672551837405, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.013501363806426525, |
|
"eval_runtime": 22.4789, |
|
"eval_samples_per_second": 4.004, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 44.549373845206325, |
|
"grad_norm": 0.13751809298992157, |
|
"learning_rate": 5.4618391277514916e-05, |
|
"loss": 0.0275, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 44.549373845206325, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.013389664702117443, |
|
"eval_runtime": 22.5113, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 44.6520221720386, |
|
"grad_norm": 0.18634863197803497, |
|
"learning_rate": 5.3589796338202016e-05, |
|
"loss": 0.0271, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 44.6520221720386, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.013616056181490421, |
|
"eval_runtime": 22.6218, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 44.754670498870865, |
|
"grad_norm": 0.20694060623645782, |
|
"learning_rate": 5.2561201398889116e-05, |
|
"loss": 0.0272, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 44.754670498870865, |
|
"eval_cer": 0.007997630331753554, |
|
"eval_loss": 0.013776379637420177, |
|
"eval_runtime": 22.7323, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 44.85731882570314, |
|
"grad_norm": 0.22977876663208008, |
|
"learning_rate": 5.153260645957622e-05, |
|
"loss": 0.0269, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 44.85731882570314, |
|
"eval_cer": 0.006664691943127962, |
|
"eval_loss": 0.01339475717395544, |
|
"eval_runtime": 22.3891, |
|
"eval_samples_per_second": 4.02, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.028596961572832886, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 44.95996715253541, |
|
"grad_norm": 0.16584943234920502, |
|
"learning_rate": 5.050401152026332e-05, |
|
"loss": 0.0269, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 44.95996715253541, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.013209763914346695, |
|
"eval_runtime": 22.5389, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 45.062615479367686, |
|
"grad_norm": 0.2402484118938446, |
|
"learning_rate": 4.947541658095042e-05, |
|
"loss": 0.0268, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 45.062615479367686, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013325601816177368, |
|
"eval_runtime": 22.5418, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 45.16526380619996, |
|
"grad_norm": 0.18944856524467468, |
|
"learning_rate": 4.844682164163752e-05, |
|
"loss": 0.0265, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 45.16526380619996, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012947525829076767, |
|
"eval_runtime": 22.3674, |
|
"eval_samples_per_second": 4.024, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 45.26791213303223, |
|
"grad_norm": 0.18352921307086945, |
|
"learning_rate": 4.741822670232462e-05, |
|
"loss": 0.0265, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 45.26791213303223, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.012937680818140507, |
|
"eval_runtime": 22.3946, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 45.37056045986451, |
|
"grad_norm": 0.2022509127855301, |
|
"learning_rate": 4.638963176301172e-05, |
|
"loss": 0.0266, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 45.37056045986451, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012964108027517796, |
|
"eval_runtime": 22.5768, |
|
"eval_samples_per_second": 3.986, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 45.47320878669678, |
|
"grad_norm": 0.3211962878704071, |
|
"learning_rate": 4.536103682369882e-05, |
|
"loss": 0.0269, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 45.47320878669678, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.01303518284112215, |
|
"eval_runtime": 22.4387, |
|
"eval_samples_per_second": 4.011, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 45.57585711352905, |
|
"grad_norm": 0.1719512939453125, |
|
"learning_rate": 4.433244188438593e-05, |
|
"loss": 0.0268, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 45.57585711352905, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012754334136843681, |
|
"eval_runtime": 22.842, |
|
"eval_samples_per_second": 3.94, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 45.67850544036132, |
|
"grad_norm": 0.1725946068763733, |
|
"learning_rate": 4.330384694507303e-05, |
|
"loss": 0.0265, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 45.67850544036132, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.012887077406048775, |
|
"eval_runtime": 22.7726, |
|
"eval_samples_per_second": 3.952, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 45.781153767193594, |
|
"grad_norm": 0.17726309597492218, |
|
"learning_rate": 4.2275252005760136e-05, |
|
"loss": 0.0269, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 45.781153767193594, |
|
"eval_cer": 0.006812796208530806, |
|
"eval_loss": 0.012967323884367943, |
|
"eval_runtime": 22.8533, |
|
"eval_samples_per_second": 3.938, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 45.88380209402587, |
|
"grad_norm": 0.2569744288921356, |
|
"learning_rate": 4.1246657066447236e-05, |
|
"loss": 0.0268, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 45.88380209402587, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.013006458058953285, |
|
"eval_runtime": 22.479, |
|
"eval_samples_per_second": 4.004, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 45.98645042085814, |
|
"grad_norm": 0.18339526653289795, |
|
"learning_rate": 4.0218062127134336e-05, |
|
"loss": 0.0272, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 45.98645042085814, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012899891473352909, |
|
"eval_runtime": 22.2876, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 46.089098747690414, |
|
"grad_norm": 0.21979086101055145, |
|
"learning_rate": 3.9189467187821436e-05, |
|
"loss": 0.0266, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 46.089098747690414, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.012851923704147339, |
|
"eval_runtime": 22.1005, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 46.19174707452269, |
|
"grad_norm": 0.18747855722904205, |
|
"learning_rate": 3.816087224850854e-05, |
|
"loss": 0.0263, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 46.19174707452269, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.012800745666027069, |
|
"eval_runtime": 22.2631, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.032171581769437, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 46.29439540135496, |
|
"grad_norm": 0.20750053226947784, |
|
"learning_rate": 3.713227730919564e-05, |
|
"loss": 0.0263, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 46.29439540135496, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012725234031677246, |
|
"eval_runtime": 22.496, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 46.39704372818723, |
|
"grad_norm": 0.19758926331996918, |
|
"learning_rate": 3.610368236988274e-05, |
|
"loss": 0.027, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 46.39704372818723, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012631443329155445, |
|
"eval_runtime": 22.4978, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 46.4996920550195, |
|
"grad_norm": 0.23830477893352509, |
|
"learning_rate": 3.507508743056984e-05, |
|
"loss": 0.0267, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 46.4996920550195, |
|
"eval_cer": 0.007701421800947867, |
|
"eval_loss": 0.012887844815850258, |
|
"eval_runtime": 22.6407, |
|
"eval_samples_per_second": 3.975, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 46.602340381851775, |
|
"grad_norm": 0.22783514857292175, |
|
"learning_rate": 3.404649249125694e-05, |
|
"loss": 0.026, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 46.602340381851775, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.012926483526825905, |
|
"eval_runtime": 22.7995, |
|
"eval_samples_per_second": 3.947, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 46.70498870868405, |
|
"grad_norm": 0.17195259034633636, |
|
"learning_rate": 3.301789755194404e-05, |
|
"loss": 0.0269, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 46.70498870868405, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.012897716835141182, |
|
"eval_runtime": 22.735, |
|
"eval_samples_per_second": 3.959, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 46.80763703551632, |
|
"grad_norm": 0.21375824511051178, |
|
"learning_rate": 3.198930261263114e-05, |
|
"loss": 0.0267, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 46.80763703551632, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.01305030845105648, |
|
"eval_runtime": 22.6049, |
|
"eval_samples_per_second": 3.981, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 46.910285362348596, |
|
"grad_norm": 0.2291615605354309, |
|
"learning_rate": 3.096070767331825e-05, |
|
"loss": 0.0269, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 46.910285362348596, |
|
"eval_cer": 0.007553317535545024, |
|
"eval_loss": 0.013074580579996109, |
|
"eval_runtime": 22.4586, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03395889186773905, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 47.01293368918087, |
|
"grad_norm": 0.18737538158893585, |
|
"learning_rate": 2.993211273400535e-05, |
|
"loss": 0.0266, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 47.01293368918087, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.01272270642220974, |
|
"eval_runtime": 22.7233, |
|
"eval_samples_per_second": 3.961, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 47.115582016013136, |
|
"grad_norm": 0.20762291550636292, |
|
"learning_rate": 2.8903517794692452e-05, |
|
"loss": 0.0263, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 47.115582016013136, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.01276144664734602, |
|
"eval_runtime": 22.6501, |
|
"eval_samples_per_second": 3.973, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 47.21823034284541, |
|
"grad_norm": 0.22940364480018616, |
|
"learning_rate": 2.7874922855379552e-05, |
|
"loss": 0.0263, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 47.21823034284541, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012876234017312527, |
|
"eval_runtime": 22.497, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 47.32087866967768, |
|
"grad_norm": 0.1963769495487213, |
|
"learning_rate": 2.6846327916066652e-05, |
|
"loss": 0.0262, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 47.32087866967768, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012759842909872532, |
|
"eval_runtime": 22.4933, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 47.423526996509956, |
|
"grad_norm": 0.13898225128650665, |
|
"learning_rate": 2.5817732976753755e-05, |
|
"loss": 0.0262, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 47.423526996509956, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012645237147808075, |
|
"eval_runtime": 22.7141, |
|
"eval_samples_per_second": 3.962, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 47.52617532334223, |
|
"grad_norm": 0.295411616563797, |
|
"learning_rate": 2.4789138037440855e-05, |
|
"loss": 0.0264, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 47.52617532334223, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012794570997357368, |
|
"eval_runtime": 22.5479, |
|
"eval_samples_per_second": 3.992, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 47.6288236501745, |
|
"grad_norm": 0.2703556418418884, |
|
"learning_rate": 2.3760543098127955e-05, |
|
"loss": 0.0264, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 47.6288236501745, |
|
"eval_cer": 0.00784952606635071, |
|
"eval_loss": 0.012676162645220757, |
|
"eval_runtime": 22.746, |
|
"eval_samples_per_second": 3.957, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 47.73147197700678, |
|
"grad_norm": 0.20613588392734528, |
|
"learning_rate": 2.2731948158815062e-05, |
|
"loss": 0.0267, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 47.73147197700678, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012650624848902225, |
|
"eval_runtime": 22.6051, |
|
"eval_samples_per_second": 3.981, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 47.83412030383905, |
|
"grad_norm": 0.19190002977848053, |
|
"learning_rate": 2.1703353219502162e-05, |
|
"loss": 0.0261, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 47.83412030383905, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012826332822442055, |
|
"eval_runtime": 22.6709, |
|
"eval_samples_per_second": 3.97, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03306523681858802, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 47.93676863067132, |
|
"grad_norm": 0.1679309457540512, |
|
"learning_rate": 2.0674758280189262e-05, |
|
"loss": 0.0262, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 47.93676863067132, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.012559423223137856, |
|
"eval_runtime": 22.4179, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 48.03941695750359, |
|
"grad_norm": 0.21128496527671814, |
|
"learning_rate": 1.9646163340876362e-05, |
|
"loss": 0.0264, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 48.03941695750359, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.01262712012976408, |
|
"eval_runtime": 22.5251, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 48.142065284335864, |
|
"grad_norm": 0.23150426149368286, |
|
"learning_rate": 1.8617568401563465e-05, |
|
"loss": 0.0263, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 48.142065284335864, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012799305841326714, |
|
"eval_runtime": 22.402, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 48.24471361116814, |
|
"grad_norm": 0.23995615541934967, |
|
"learning_rate": 1.7588973462250565e-05, |
|
"loss": 0.0259, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 48.24471361116814, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.012804466299712658, |
|
"eval_runtime": 22.5738, |
|
"eval_samples_per_second": 3.987, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 48.34736193800041, |
|
"grad_norm": 0.2322542518377304, |
|
"learning_rate": 1.6560378522937665e-05, |
|
"loss": 0.0263, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 48.34736193800041, |
|
"eval_cer": 0.006960900473933649, |
|
"eval_loss": 0.012536253780126572, |
|
"eval_runtime": 22.4337, |
|
"eval_samples_per_second": 4.012, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.029490616621983913, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 48.450010264832684, |
|
"grad_norm": 0.19019187986850739, |
|
"learning_rate": 1.553178358362477e-05, |
|
"loss": 0.0258, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 48.450010264832684, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012600379064679146, |
|
"eval_runtime": 22.4073, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 48.55265859166496, |
|
"grad_norm": 0.16290856897830963, |
|
"learning_rate": 1.450318864431187e-05, |
|
"loss": 0.0262, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 48.55265859166496, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012459825724363327, |
|
"eval_runtime": 22.3028, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 48.65530691849723, |
|
"grad_norm": 0.20070821046829224, |
|
"learning_rate": 1.3474593704998972e-05, |
|
"loss": 0.0258, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 48.65530691849723, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.01252325065433979, |
|
"eval_runtime": 22.5396, |
|
"eval_samples_per_second": 3.993, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 48.7579552453295, |
|
"grad_norm": 0.21549555659294128, |
|
"learning_rate": 1.2445998765686073e-05, |
|
"loss": 0.026, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 48.7579552453295, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.0124691566452384, |
|
"eval_runtime": 22.5036, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 48.86060357216177, |
|
"grad_norm": 0.27654412388801575, |
|
"learning_rate": 1.1417403826373175e-05, |
|
"loss": 0.0265, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 48.86060357216177, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012546148151159286, |
|
"eval_runtime": 22.6064, |
|
"eval_samples_per_second": 3.981, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 48.963251898994045, |
|
"grad_norm": 0.1903195083141327, |
|
"learning_rate": 1.0388808887060275e-05, |
|
"loss": 0.0262, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 48.963251898994045, |
|
"eval_cer": 0.00740521327014218, |
|
"eval_loss": 0.012493513524532318, |
|
"eval_runtime": 22.4828, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.032171581769437, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 49.06590022582632, |
|
"grad_norm": 0.2395348995923996, |
|
"learning_rate": 9.360213947747377e-06, |
|
"loss": 0.0261, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 49.06590022582632, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012535948306322098, |
|
"eval_runtime": 22.8324, |
|
"eval_samples_per_second": 3.942, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 49.16854855265859, |
|
"grad_norm": 0.20346806943416595, |
|
"learning_rate": 8.33161900843448e-06, |
|
"loss": 0.0259, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 49.16854855265859, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012490322813391685, |
|
"eval_runtime": 22.7561, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 49.271196879490866, |
|
"grad_norm": 0.18711692094802856, |
|
"learning_rate": 7.30302406912158e-06, |
|
"loss": 0.0261, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 49.271196879490866, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.01254476048052311, |
|
"eval_runtime": 22.575, |
|
"eval_samples_per_second": 3.987, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 49.37384520632314, |
|
"grad_norm": 0.19651705026626587, |
|
"learning_rate": 6.274429129808681e-06, |
|
"loss": 0.0257, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 49.37384520632314, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012482382357120514, |
|
"eval_runtime": 22.6241, |
|
"eval_samples_per_second": 3.978, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 49.47649353315541, |
|
"grad_norm": 0.1909824162721634, |
|
"learning_rate": 5.245834190495783e-06, |
|
"loss": 0.0261, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 49.47649353315541, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012513377703726292, |
|
"eval_runtime": 22.706, |
|
"eval_samples_per_second": 3.964, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 49.57914185998768, |
|
"grad_norm": 0.18398302793502808, |
|
"learning_rate": 4.217239251182884e-06, |
|
"loss": 0.0262, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 49.57914185998768, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012494869530200958, |
|
"eval_runtime": 22.545, |
|
"eval_samples_per_second": 3.992, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 49.68179018681995, |
|
"grad_norm": 0.2315966933965683, |
|
"learning_rate": 3.1886443118699856e-06, |
|
"loss": 0.0255, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 49.68179018681995, |
|
"eval_cer": 0.007257109004739337, |
|
"eval_loss": 0.012507443316280842, |
|
"eval_runtime": 22.6822, |
|
"eval_samples_per_second": 3.968, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03127792672028597, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 49.784438513652226, |
|
"grad_norm": 0.16276974976062775, |
|
"learning_rate": 2.1600493725570872e-06, |
|
"loss": 0.0258, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 49.784438513652226, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012460754252970219, |
|
"eval_runtime": 22.3925, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 49.8870868404845, |
|
"grad_norm": 0.16273947060108185, |
|
"learning_rate": 1.1314544332441885e-06, |
|
"loss": 0.0261, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 49.8870868404845, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.012488246895372868, |
|
"eval_runtime": 22.5881, |
|
"eval_samples_per_second": 3.984, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 49.98973516731677, |
|
"grad_norm": 0.2636996805667877, |
|
"learning_rate": 1.0285949393128985e-07, |
|
"loss": 0.0263, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 49.98973516731677, |
|
"eval_cer": 0.0071090047393364926, |
|
"eval_loss": 0.01250074990093708, |
|
"eval_runtime": 22.7073, |
|
"eval_samples_per_second": 3.963, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.03038427167113494, |
|
"step": 487000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 487100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 4000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.703420432200581e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|