|
{ |
|
"best_metric": 0.02429259568452835, |
|
"best_model_checkpoint": "./phase3-t5/checkpoint-484000", |
|
"epoch": 50.0, |
|
"eval_steps": 1000, |
|
"global_step": 487100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10264832683227264, |
|
"grad_norm": 0.2675953805446625, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1517, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.10264832683227264, |
|
"eval_cer": 0.3379739336492891, |
|
"eval_loss": 0.09135649353265762, |
|
"eval_runtime": 61.2416, |
|
"eval_samples_per_second": 1.47, |
|
"eval_steps_per_second": 0.016, |
|
"eval_wer": 0.42716711349419123, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.20529665366454528, |
|
"grad_norm": 0.21362937986850739, |
|
"learning_rate": 0.0004989714050606871, |
|
"loss": 0.1197, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.20529665366454528, |
|
"eval_cer": 0.15506516587677724, |
|
"eval_loss": 0.08493143320083618, |
|
"eval_runtime": 60.3144, |
|
"eval_samples_per_second": 1.492, |
|
"eval_steps_per_second": 0.017, |
|
"eval_wer": 0.2645218945487042, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3079449804968179, |
|
"grad_norm": 0.381354957818985, |
|
"learning_rate": 0.0004979428101213742, |
|
"loss": 0.1119, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3079449804968179, |
|
"eval_cer": 0.13122037914691942, |
|
"eval_loss": 0.07921701669692993, |
|
"eval_runtime": 41.6322, |
|
"eval_samples_per_second": 2.162, |
|
"eval_steps_per_second": 0.024, |
|
"eval_wer": 0.23145665773011617, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41059330732909055, |
|
"grad_norm": 0.21338549256324768, |
|
"learning_rate": 0.0004969142151820613, |
|
"loss": 0.1083, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41059330732909055, |
|
"eval_cer": 0.05139218009478673, |
|
"eval_loss": 0.07416867464780807, |
|
"eval_runtime": 24.7028, |
|
"eval_samples_per_second": 3.643, |
|
"eval_steps_per_second": 0.04, |
|
"eval_wer": 0.14655942806076855, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5132416341613631, |
|
"grad_norm": 0.16844697296619415, |
|
"learning_rate": 0.0004958856202427484, |
|
"loss": 0.1054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5132416341613631, |
|
"eval_cer": 0.06916469194312796, |
|
"eval_loss": 0.0707884430885315, |
|
"eval_runtime": 32.9432, |
|
"eval_samples_per_second": 2.732, |
|
"eval_steps_per_second": 0.03, |
|
"eval_wer": 0.1572832886505809, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6158899609936358, |
|
"grad_norm": 0.2411990612745285, |
|
"learning_rate": 0.0004948570253034355, |
|
"loss": 0.1033, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6158899609936358, |
|
"eval_cer": 0.1552132701421801, |
|
"eval_loss": 0.07135774940252304, |
|
"eval_runtime": 46.0141, |
|
"eval_samples_per_second": 1.956, |
|
"eval_steps_per_second": 0.022, |
|
"eval_wer": 0.24128686327077747, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7185382878259085, |
|
"grad_norm": 0.24947325885295868, |
|
"learning_rate": 0.0004938284303641226, |
|
"loss": 0.1017, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7185382878259085, |
|
"eval_cer": 0.059537914691943125, |
|
"eval_loss": 0.06659836322069168, |
|
"eval_runtime": 45.8436, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.022, |
|
"eval_wer": 0.14298480786416443, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8211866146581811, |
|
"grad_norm": 0.14766907691955566, |
|
"learning_rate": 0.0004927998354248098, |
|
"loss": 0.1006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8211866146581811, |
|
"eval_cer": 0.0727191943127962, |
|
"eval_loss": 0.06510724872350693, |
|
"eval_runtime": 51.2808, |
|
"eval_samples_per_second": 1.755, |
|
"eval_steps_per_second": 0.02, |
|
"eval_wer": 0.15638963360142985, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9238349414904538, |
|
"grad_norm": 0.25097745656967163, |
|
"learning_rate": 0.0004917712404854969, |
|
"loss": 0.0982, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9238349414904538, |
|
"eval_cer": 0.04576421800947867, |
|
"eval_loss": 0.06489837914705276, |
|
"eval_runtime": 45.8464, |
|
"eval_samples_per_second": 1.963, |
|
"eval_steps_per_second": 0.022, |
|
"eval_wer": 0.13047363717605004, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.0264832683227263, |
|
"grad_norm": 0.31185394525527954, |
|
"learning_rate": 0.000490742645546184, |
|
"loss": 0.0976, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0264832683227263, |
|
"eval_cer": 0.050355450236966824, |
|
"eval_loss": 0.06680955737829208, |
|
"eval_runtime": 46.015, |
|
"eval_samples_per_second": 1.956, |
|
"eval_steps_per_second": 0.022, |
|
"eval_wer": 0.13494191242180517, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.129131595154999, |
|
"grad_norm": 0.2225043922662735, |
|
"learning_rate": 0.000489714050606871, |
|
"loss": 0.0944, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.129131595154999, |
|
"eval_cer": 0.03806279620853081, |
|
"eval_loss": 0.06565282493829727, |
|
"eval_runtime": 39.9826, |
|
"eval_samples_per_second": 2.251, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.1224307417336908, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.2317799219872716, |
|
"grad_norm": 0.2643093168735504, |
|
"learning_rate": 0.0004886854556675581, |
|
"loss": 0.095, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.2317799219872716, |
|
"eval_cer": 0.042950236966824644, |
|
"eval_loss": 0.06407604366540909, |
|
"eval_runtime": 38.3229, |
|
"eval_samples_per_second": 2.348, |
|
"eval_steps_per_second": 0.026, |
|
"eval_wer": 0.12779267202859695, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.3344282488195442, |
|
"grad_norm": 0.1704595983028412, |
|
"learning_rate": 0.00048765686072824524, |
|
"loss": 0.0934, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.3344282488195442, |
|
"eval_cer": 0.030953791469194313, |
|
"eval_loss": 0.06052744388580322, |
|
"eval_runtime": 43.0244, |
|
"eval_samples_per_second": 2.092, |
|
"eval_steps_per_second": 0.023, |
|
"eval_wer": 0.11081322609472744, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.437076575651817, |
|
"grad_norm": 0.28452351689338684, |
|
"learning_rate": 0.00048662826578893233, |
|
"loss": 0.0922, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.437076575651817, |
|
"eval_cer": 0.030065165876777253, |
|
"eval_loss": 0.06290669739246368, |
|
"eval_runtime": 38.566, |
|
"eval_samples_per_second": 2.334, |
|
"eval_steps_per_second": 0.026, |
|
"eval_wer": 0.1063449508489723, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.5397249024840896, |
|
"grad_norm": 0.18739064037799835, |
|
"learning_rate": 0.0004855996708496194, |
|
"loss": 0.0914, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.5397249024840896, |
|
"eval_cer": 0.09389810426540285, |
|
"eval_loss": 0.060126081109046936, |
|
"eval_runtime": 60.9362, |
|
"eval_samples_per_second": 1.477, |
|
"eval_steps_per_second": 0.016, |
|
"eval_wer": 0.1742627345844504, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.642373229316362, |
|
"grad_norm": 0.14926160871982574, |
|
"learning_rate": 0.0004845710759103065, |
|
"loss": 0.0923, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.642373229316362, |
|
"eval_cer": 0.04428317535545024, |
|
"eval_loss": 0.05910157784819603, |
|
"eval_runtime": 35.0608, |
|
"eval_samples_per_second": 2.567, |
|
"eval_steps_per_second": 0.029, |
|
"eval_wer": 0.11974977658623771, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.7450215561486346, |
|
"grad_norm": 0.15932171046733856, |
|
"learning_rate": 0.0004835424809709936, |
|
"loss": 0.0923, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.7450215561486346, |
|
"eval_cer": 0.08975118483412323, |
|
"eval_loss": 0.05897140130400658, |
|
"eval_runtime": 37.0041, |
|
"eval_samples_per_second": 2.432, |
|
"eval_steps_per_second": 0.027, |
|
"eval_wer": 0.16979445933869527, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.8476698829809073, |
|
"grad_norm": 0.23555859923362732, |
|
"learning_rate": 0.0004825138860316807, |
|
"loss": 0.0901, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.8476698829809073, |
|
"eval_cer": 0.033619668246445494, |
|
"eval_loss": 0.057106491178274155, |
|
"eval_runtime": 20.6425, |
|
"eval_samples_per_second": 4.36, |
|
"eval_steps_per_second": 0.048, |
|
"eval_wer": 0.10723860589812333, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.95031820981318, |
|
"grad_norm": 0.16271623969078064, |
|
"learning_rate": 0.00048148529109236785, |
|
"loss": 0.0896, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.95031820981318, |
|
"eval_cer": 0.039543838862559244, |
|
"eval_loss": 0.05746171995997429, |
|
"eval_runtime": 21.3164, |
|
"eval_samples_per_second": 4.222, |
|
"eval_steps_per_second": 0.047, |
|
"eval_wer": 0.11349419124218052, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.0529665366454526, |
|
"grad_norm": 0.225450336933136, |
|
"learning_rate": 0.00048045669615305494, |
|
"loss": 0.0879, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.0529665366454526, |
|
"eval_cer": 0.041321090047393365, |
|
"eval_loss": 0.05702373385429382, |
|
"eval_runtime": 22.0309, |
|
"eval_samples_per_second": 4.085, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.11170688114387846, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.1556148634777252, |
|
"grad_norm": 0.2128904014825821, |
|
"learning_rate": 0.00047942810121374204, |
|
"loss": 0.0875, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.1556148634777252, |
|
"eval_cer": 0.03154620853080569, |
|
"eval_loss": 0.05841705948114395, |
|
"eval_runtime": 22.0788, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.10187667560321716, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.258263190309998, |
|
"grad_norm": 0.24170714616775513, |
|
"learning_rate": 0.00047839950627442913, |
|
"loss": 0.0869, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.258263190309998, |
|
"eval_cer": 0.034212085308056875, |
|
"eval_loss": 0.057785358279943466, |
|
"eval_runtime": 22.1563, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.10902591599642537, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.3609115171422705, |
|
"grad_norm": 0.261454313993454, |
|
"learning_rate": 0.0004773709113351162, |
|
"loss": 0.0868, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.3609115171422705, |
|
"eval_cer": 0.03450829383886256, |
|
"eval_loss": 0.05649031326174736, |
|
"eval_runtime": 22.1442, |
|
"eval_samples_per_second": 4.064, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.10723860589812333, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.463559843974543, |
|
"grad_norm": 0.24116463959217072, |
|
"learning_rate": 0.0004763423163958033, |
|
"loss": 0.0861, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.463559843974543, |
|
"eval_cer": 0.03169431279620853, |
|
"eval_loss": 0.05647359788417816, |
|
"eval_runtime": 23.1195, |
|
"eval_samples_per_second": 3.893, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.10187667560321716, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.566208170806816, |
|
"grad_norm": 0.19247783720493317, |
|
"learning_rate": 0.0004753137214564904, |
|
"loss": 0.0861, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.566208170806816, |
|
"eval_cer": 0.027103080568720378, |
|
"eval_loss": 0.05473396182060242, |
|
"eval_runtime": 21.8207, |
|
"eval_samples_per_second": 4.125, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.10008936550491511, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.6688564976390885, |
|
"grad_norm": 0.20277945697307587, |
|
"learning_rate": 0.00047428512651717756, |
|
"loss": 0.0859, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.6688564976390885, |
|
"eval_cer": 0.028880331753554502, |
|
"eval_loss": 0.05520312860608101, |
|
"eval_runtime": 22.4109, |
|
"eval_samples_per_second": 4.016, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.10187667560321716, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.771504824471361, |
|
"grad_norm": 0.17546099424362183, |
|
"learning_rate": 0.00047325653157786465, |
|
"loss": 0.0853, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.771504824471361, |
|
"eval_cer": 0.027251184834123223, |
|
"eval_loss": 0.05452750623226166, |
|
"eval_runtime": 22.3468, |
|
"eval_samples_per_second": 4.027, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09562109025915996, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.874153151303634, |
|
"grad_norm": 0.23071998357772827, |
|
"learning_rate": 0.00047222793663855174, |
|
"loss": 0.0857, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.874153151303634, |
|
"eval_cer": 0.027399289099526065, |
|
"eval_loss": 0.05141612887382507, |
|
"eval_runtime": 22.353, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09651474530831099, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.9768014781359065, |
|
"grad_norm": 0.2768128216266632, |
|
"learning_rate": 0.00047119934169923884, |
|
"loss": 0.0844, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.9768014781359065, |
|
"eval_cer": 0.030805687203791468, |
|
"eval_loss": 0.053941383957862854, |
|
"eval_runtime": 22.2529, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.1063449508489723, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.079449804968179, |
|
"grad_norm": 0.21525971591472626, |
|
"learning_rate": 0.00047017074675992593, |
|
"loss": 0.0828, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.079449804968179, |
|
"eval_cer": 0.027843601895734597, |
|
"eval_loss": 0.05354895442724228, |
|
"eval_runtime": 22.123, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0938337801608579, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.1820981318004518, |
|
"grad_norm": 0.36673811078071594, |
|
"learning_rate": 0.000469142151820613, |
|
"loss": 0.0825, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.1820981318004518, |
|
"eval_cer": 0.02843601895734597, |
|
"eval_loss": 0.05313113331794739, |
|
"eval_runtime": 22.1652, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.10008936550491511, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.2847464586327244, |
|
"grad_norm": 0.20910222828388214, |
|
"learning_rate": 0.0004681135568813001, |
|
"loss": 0.0815, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.2847464586327244, |
|
"eval_cer": 0.04191350710900474, |
|
"eval_loss": 0.05280559882521629, |
|
"eval_runtime": 25.447, |
|
"eval_samples_per_second": 3.537, |
|
"eval_steps_per_second": 0.039, |
|
"eval_wer": 0.11349419124218052, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.387394785464997, |
|
"grad_norm": 0.18754634261131287, |
|
"learning_rate": 0.00046708496194198726, |
|
"loss": 0.0815, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.387394785464997, |
|
"eval_cer": 0.029472748815165876, |
|
"eval_loss": 0.05334876477718353, |
|
"eval_runtime": 21.9504, |
|
"eval_samples_per_second": 4.1, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.10545129579982127, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.4900431122972697, |
|
"grad_norm": 0.2228628695011139, |
|
"learning_rate": 0.00046605636700267436, |
|
"loss": 0.0821, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.4900431122972697, |
|
"eval_cer": 0.029917061611374408, |
|
"eval_loss": 0.05586351081728935, |
|
"eval_runtime": 21.6771, |
|
"eval_samples_per_second": 4.152, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.10545129579982127, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.592691439129542, |
|
"grad_norm": 0.13316944241523743, |
|
"learning_rate": 0.00046502777206336145, |
|
"loss": 0.0813, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.592691439129542, |
|
"eval_cer": 0.029324644549763034, |
|
"eval_loss": 0.05374256148934364, |
|
"eval_runtime": 21.9747, |
|
"eval_samples_per_second": 4.096, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.10277033065236818, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.6953397659618146, |
|
"grad_norm": 0.16168580949306488, |
|
"learning_rate": 0.00046399917712404854, |
|
"loss": 0.0812, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.6953397659618146, |
|
"eval_cer": 0.03450829383886256, |
|
"eval_loss": 0.05287105217576027, |
|
"eval_runtime": 24.1708, |
|
"eval_samples_per_second": 3.724, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.10008936550491511, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.7979880927940872, |
|
"grad_norm": 0.20095530152320862, |
|
"learning_rate": 0.00046297058218473564, |
|
"loss": 0.0814, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.7979880927940872, |
|
"eval_cer": 0.035545023696682464, |
|
"eval_loss": 0.05389825999736786, |
|
"eval_runtime": 21.7718, |
|
"eval_samples_per_second": 4.134, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.10723860589812333, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.90063641962636, |
|
"grad_norm": 0.19599634408950806, |
|
"learning_rate": 0.00046194198724542273, |
|
"loss": 0.0815, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.90063641962636, |
|
"eval_cer": 0.026954976303317536, |
|
"eval_loss": 0.05257488787174225, |
|
"eval_runtime": 22.0572, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09651474530831099, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.0032847464586325, |
|
"grad_norm": 0.15417757630348206, |
|
"learning_rate": 0.0004609133923061098, |
|
"loss": 0.0806, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.0032847464586325, |
|
"eval_cer": 0.027695497630331755, |
|
"eval_loss": 0.05188766494393349, |
|
"eval_runtime": 22.1269, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09830205540661305, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.105933073290905, |
|
"grad_norm": 0.26163730025291443, |
|
"learning_rate": 0.00045988479736679697, |
|
"loss": 0.0794, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.105933073290905, |
|
"eval_cer": 0.025622037914691944, |
|
"eval_loss": 0.048877034336328506, |
|
"eval_runtime": 22.2755, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08847184986595175, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.208581400123178, |
|
"grad_norm": 0.20826220512390137, |
|
"learning_rate": 0.00045885620242748406, |
|
"loss": 0.0785, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.208581400123178, |
|
"eval_cer": 0.025622037914691944, |
|
"eval_loss": 0.05260869115591049, |
|
"eval_runtime": 22.0992, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.3112297269554505, |
|
"grad_norm": 0.3389241695404053, |
|
"learning_rate": 0.00045782760748817116, |
|
"loss": 0.0782, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.3112297269554505, |
|
"eval_cer": 0.026954976303317536, |
|
"eval_loss": 0.04903939738869667, |
|
"eval_runtime": 22.5525, |
|
"eval_samples_per_second": 3.991, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.413878053787723, |
|
"grad_norm": 0.29210948944091797, |
|
"learning_rate": 0.00045679901254885825, |
|
"loss": 0.0779, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.413878053787723, |
|
"eval_cer": 0.02754739336492891, |
|
"eval_loss": 0.04861212149262428, |
|
"eval_runtime": 22.0013, |
|
"eval_samples_per_second": 4.091, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.516526380619996, |
|
"grad_norm": 0.19757212698459625, |
|
"learning_rate": 0.00045577041760954534, |
|
"loss": 0.0786, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.516526380619996, |
|
"eval_cer": 0.023992890995260665, |
|
"eval_loss": 0.04914968088269234, |
|
"eval_runtime": 22.6286, |
|
"eval_samples_per_second": 3.977, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.08936550491510277, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.619174707452268, |
|
"grad_norm": 0.23518621921539307, |
|
"learning_rate": 0.00045474182267023244, |
|
"loss": 0.0778, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.619174707452268, |
|
"eval_cer": 0.025770142180094786, |
|
"eval_loss": 0.04797298088669777, |
|
"eval_runtime": 22.0387, |
|
"eval_samples_per_second": 4.084, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08847184986595175, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.721823034284541, |
|
"grad_norm": 0.18045727908611298, |
|
"learning_rate": 0.00045371322773091953, |
|
"loss": 0.0782, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.721823034284541, |
|
"eval_cer": 0.023252369668246446, |
|
"eval_loss": 0.049161382019519806, |
|
"eval_runtime": 22.0981, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08579088471849866, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.824471361116814, |
|
"grad_norm": 0.2216973602771759, |
|
"learning_rate": 0.0004526846327916067, |
|
"loss": 0.0778, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.824471361116814, |
|
"eval_cer": 0.027103080568720378, |
|
"eval_loss": 0.04854327812790871, |
|
"eval_runtime": 22.1254, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.927119687949086, |
|
"grad_norm": 0.21196693181991577, |
|
"learning_rate": 0.00045165603785229377, |
|
"loss": 0.078, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.927119687949086, |
|
"eval_cer": 0.0254739336492891, |
|
"eval_loss": 0.04801648482680321, |
|
"eval_runtime": 22.3465, |
|
"eval_samples_per_second": 4.027, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08847184986595175, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.029768014781359, |
|
"grad_norm": 0.22312557697296143, |
|
"learning_rate": 0.00045062744291298086, |
|
"loss": 0.0771, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.029768014781359, |
|
"eval_cer": 0.027843601895734597, |
|
"eval_loss": 0.04988682270050049, |
|
"eval_runtime": 22.2617, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0938337801608579, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.132416341613632, |
|
"grad_norm": 0.22252117097377777, |
|
"learning_rate": 0.00044959884797366796, |
|
"loss": 0.0753, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.132416341613632, |
|
"eval_cer": 0.024140995260663507, |
|
"eval_loss": 0.04682554677128792, |
|
"eval_runtime": 22.1777, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08936550491510277, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.235064668445904, |
|
"grad_norm": 0.15847323834896088, |
|
"learning_rate": 0.00044857025303435505, |
|
"loss": 0.0756, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.235064668445904, |
|
"eval_cer": 0.02502962085308057, |
|
"eval_loss": 0.047456566244363785, |
|
"eval_runtime": 22.3306, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08847184986595175, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.337712995278177, |
|
"grad_norm": 0.2008858174085617, |
|
"learning_rate": 0.00044754165809504214, |
|
"loss": 0.0763, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.337712995278177, |
|
"eval_cer": 0.021178909952606635, |
|
"eval_loss": 0.0479045994579792, |
|
"eval_runtime": 22.2995, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0777479892761394, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.44036132211045, |
|
"grad_norm": 0.31161248683929443, |
|
"learning_rate": 0.00044651306315572923, |
|
"loss": 0.0761, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.44036132211045, |
|
"eval_cer": 0.026066350710900472, |
|
"eval_loss": 0.047916192561388016, |
|
"eval_runtime": 22.0152, |
|
"eval_samples_per_second": 4.088, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08847184986595175, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.543009648942722, |
|
"grad_norm": 0.19102248549461365, |
|
"learning_rate": 0.0004454844682164164, |
|
"loss": 0.0756, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.543009648942722, |
|
"eval_cer": 0.023252369668246446, |
|
"eval_loss": 0.04646703228354454, |
|
"eval_runtime": 22.0815, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08668453976764968, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.645657975774995, |
|
"grad_norm": 0.3801836669445038, |
|
"learning_rate": 0.0004444558732771035, |
|
"loss": 0.0758, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.645657975774995, |
|
"eval_cer": 0.025770142180094786, |
|
"eval_loss": 0.04690609872341156, |
|
"eval_runtime": 22.1797, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09294012511170688, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.748306302607268, |
|
"grad_norm": 0.24007199704647064, |
|
"learning_rate": 0.00044342727833779057, |
|
"loss": 0.0751, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.748306302607268, |
|
"eval_cer": 0.026066350710900472, |
|
"eval_loss": 0.04649204760789871, |
|
"eval_runtime": 22.2606, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0902591599642538, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.85095462943954, |
|
"grad_norm": 0.177778959274292, |
|
"learning_rate": 0.00044239868339847766, |
|
"loss": 0.0751, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.85095462943954, |
|
"eval_cer": 0.02665876777251185, |
|
"eval_loss": 0.04591357707977295, |
|
"eval_runtime": 22.0753, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09294012511170688, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.953602956271813, |
|
"grad_norm": 0.14689143002033234, |
|
"learning_rate": 0.00044137008845916475, |
|
"loss": 0.0754, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.953602956271813, |
|
"eval_cer": 0.025622037914691944, |
|
"eval_loss": 0.04754678159952164, |
|
"eval_runtime": 22.1351, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.09294012511170688, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.056251283104086, |
|
"grad_norm": 0.1848069429397583, |
|
"learning_rate": 0.00044034149351985185, |
|
"loss": 0.0738, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.056251283104086, |
|
"eval_cer": 0.027103080568720378, |
|
"eval_loss": 0.04577142372727394, |
|
"eval_runtime": 22.1342, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08936550491510277, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.158899609936358, |
|
"grad_norm": 0.22594769299030304, |
|
"learning_rate": 0.00043931289858053894, |
|
"loss": 0.0735, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.158899609936358, |
|
"eval_cer": 0.025177725118483412, |
|
"eval_loss": 0.04629523307085037, |
|
"eval_runtime": 22.2033, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08847184986595175, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.261547936768631, |
|
"grad_norm": 0.20947369933128357, |
|
"learning_rate": 0.0004382843036412261, |
|
"loss": 0.0734, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 6.261547936768631, |
|
"eval_cer": 0.026510663507109004, |
|
"eval_loss": 0.04853444546461105, |
|
"eval_runtime": 21.9491, |
|
"eval_samples_per_second": 4.1, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.09204647006255585, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 6.3641962636009035, |
|
"grad_norm": 0.2018922120332718, |
|
"learning_rate": 0.0004372557087019132, |
|
"loss": 0.0734, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.3641962636009035, |
|
"eval_cer": 0.022808056872037914, |
|
"eval_loss": 0.04681561887264252, |
|
"eval_runtime": 22.0488, |
|
"eval_samples_per_second": 4.082, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07864164432529044, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.466844590433176, |
|
"grad_norm": 0.2090204358100891, |
|
"learning_rate": 0.0004362271137626003, |
|
"loss": 0.0737, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.466844590433176, |
|
"eval_cer": 0.03865521327014218, |
|
"eval_loss": 0.046171579509973526, |
|
"eval_runtime": 24.3572, |
|
"eval_samples_per_second": 3.695, |
|
"eval_steps_per_second": 0.041, |
|
"eval_wer": 0.09651474530831099, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.569492917265449, |
|
"grad_norm": 0.21769754588603973, |
|
"learning_rate": 0.0004351985188232874, |
|
"loss": 0.0726, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.569492917265449, |
|
"eval_cer": 0.0254739336492891, |
|
"eval_loss": 0.047106679528951645, |
|
"eval_runtime": 21.6323, |
|
"eval_samples_per_second": 4.16, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08936550491510277, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.6721412440977215, |
|
"grad_norm": 0.17357899248600006, |
|
"learning_rate": 0.0004341699238839745, |
|
"loss": 0.0734, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.6721412440977215, |
|
"eval_cer": 0.026214454976303318, |
|
"eval_loss": 0.04763852432370186, |
|
"eval_runtime": 21.7494, |
|
"eval_samples_per_second": 4.138, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08757819481680071, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.774789570929994, |
|
"grad_norm": 0.23694564402103424, |
|
"learning_rate": 0.0004331413289446616, |
|
"loss": 0.0737, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.774789570929994, |
|
"eval_cer": 0.02384478672985782, |
|
"eval_loss": 0.04740356281399727, |
|
"eval_runtime": 21.8747, |
|
"eval_samples_per_second": 4.114, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.0840035746201966, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.877437897762267, |
|
"grad_norm": 0.24957220256328583, |
|
"learning_rate": 0.00043211273400534876, |
|
"loss": 0.0725, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.877437897762267, |
|
"eval_cer": 0.023548578199052133, |
|
"eval_loss": 0.04718530550599098, |
|
"eval_runtime": 22.1234, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08132260947274352, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.980086224594539, |
|
"grad_norm": 0.20797890424728394, |
|
"learning_rate": 0.00043108413906603585, |
|
"loss": 0.0728, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.980086224594539, |
|
"eval_cer": 0.02384478672985782, |
|
"eval_loss": 0.0460374690592289, |
|
"eval_runtime": 22.126, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08042895442359249, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.082734551426812, |
|
"grad_norm": 0.21543003618717194, |
|
"learning_rate": 0.00043005554412672294, |
|
"loss": 0.0714, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.082734551426812, |
|
"eval_cer": 0.024881516587677725, |
|
"eval_loss": 0.04654213413596153, |
|
"eval_runtime": 22.1075, |
|
"eval_samples_per_second": 4.071, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08579088471849866, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.185382878259085, |
|
"grad_norm": 0.2328251153230667, |
|
"learning_rate": 0.00042902694918741004, |
|
"loss": 0.0711, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 7.185382878259085, |
|
"eval_cer": 0.02458530805687204, |
|
"eval_loss": 0.0458548367023468, |
|
"eval_runtime": 22.1559, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08489722966934764, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 7.288031205091357, |
|
"grad_norm": 0.2861992418766022, |
|
"learning_rate": 0.00042799835424809713, |
|
"loss": 0.0711, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 7.288031205091357, |
|
"eval_cer": 0.0254739336492891, |
|
"eval_loss": 0.04648789018392563, |
|
"eval_runtime": 22.2028, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08489722966934764, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 7.39067953192363, |
|
"grad_norm": 0.21449844539165497, |
|
"learning_rate": 0.0004269697593087842, |
|
"loss": 0.0709, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.39067953192363, |
|
"eval_cer": 0.023548578199052133, |
|
"eval_loss": 0.04555143415927887, |
|
"eval_runtime": 22.2316, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0777479892761394, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.493327858755903, |
|
"grad_norm": 0.21393579244613647, |
|
"learning_rate": 0.0004259411643694713, |
|
"loss": 0.0719, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.493327858755903, |
|
"eval_cer": 0.023548578199052133, |
|
"eval_loss": 0.04599784314632416, |
|
"eval_runtime": 22.1857, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0777479892761394, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.5959761855881744, |
|
"grad_norm": 0.26174065470695496, |
|
"learning_rate": 0.00042491256943015846, |
|
"loss": 0.0707, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.5959761855881744, |
|
"eval_cer": 0.026362559241706163, |
|
"eval_loss": 0.046083446592092514, |
|
"eval_runtime": 22.1298, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08668453976764968, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.698624512420448, |
|
"grad_norm": 0.23440520465373993, |
|
"learning_rate": 0.00042388397449084556, |
|
"loss": 0.0707, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.698624512420448, |
|
"eval_cer": 0.02502962085308057, |
|
"eval_loss": 0.046040162444114685, |
|
"eval_runtime": 22.0318, |
|
"eval_samples_per_second": 4.085, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08579088471849866, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.80127283925272, |
|
"grad_norm": 0.210636705160141, |
|
"learning_rate": 0.00042285537955153265, |
|
"loss": 0.0717, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 7.80127283925272, |
|
"eval_cer": 0.023252369668246446, |
|
"eval_loss": 0.044275738298892975, |
|
"eval_runtime": 22.1574, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08132260947274352, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 7.903921166084993, |
|
"grad_norm": 0.19636699557304382, |
|
"learning_rate": 0.00042182678461221974, |
|
"loss": 0.0713, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 7.903921166084993, |
|
"eval_cer": 0.025177725118483412, |
|
"eval_loss": 0.04342404752969742, |
|
"eval_runtime": 22.2249, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08221626452189455, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 8.006569492917265, |
|
"grad_norm": 0.23213474452495575, |
|
"learning_rate": 0.00042079818967290683, |
|
"loss": 0.0703, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 8.006569492917265, |
|
"eval_cer": 0.02428909952606635, |
|
"eval_loss": 0.04461972787976265, |
|
"eval_runtime": 22.2476, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08489722966934764, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 8.109217819749539, |
|
"grad_norm": 0.5032120943069458, |
|
"learning_rate": 0.00041976959473359393, |
|
"loss": 0.0692, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 8.109217819749539, |
|
"eval_cer": 0.025177725118483412, |
|
"eval_loss": 0.0442008450627327, |
|
"eval_runtime": 22.2279, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08310991957104558, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 8.21186614658181, |
|
"grad_norm": 0.2267637550830841, |
|
"learning_rate": 0.000418740999794281, |
|
"loss": 0.0694, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 8.21186614658181, |
|
"eval_cer": 0.02473341232227488, |
|
"eval_loss": 0.041503287851810455, |
|
"eval_runtime": 22.2205, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08310991957104558, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 8.314514473414084, |
|
"grad_norm": 0.22922959923744202, |
|
"learning_rate": 0.00041771240485496817, |
|
"loss": 0.0698, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 8.314514473414084, |
|
"eval_cer": 0.02132701421800948, |
|
"eval_loss": 0.0416925847530365, |
|
"eval_runtime": 22.7674, |
|
"eval_samples_per_second": 3.953, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.07506702412868632, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 8.417162800246356, |
|
"grad_norm": 0.2807318866252899, |
|
"learning_rate": 0.00041668380991565526, |
|
"loss": 0.0687, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 8.417162800246356, |
|
"eval_cer": 0.021475118483412322, |
|
"eval_loss": 0.042673755437135696, |
|
"eval_runtime": 21.9781, |
|
"eval_samples_per_second": 4.095, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07327971403038427, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 8.51981112707863, |
|
"grad_norm": 0.19773901998996735, |
|
"learning_rate": 0.00041565521497634235, |
|
"loss": 0.0697, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 8.51981112707863, |
|
"eval_cer": 0.025770142180094786, |
|
"eval_loss": 0.04353732243180275, |
|
"eval_runtime": 21.9415, |
|
"eval_samples_per_second": 4.102, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08668453976764968, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 8.622459453910901, |
|
"grad_norm": 0.2049770951271057, |
|
"learning_rate": 0.00041462662003702945, |
|
"loss": 0.0689, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 8.622459453910901, |
|
"eval_cer": 0.02295616113744076, |
|
"eval_loss": 0.044080935418605804, |
|
"eval_runtime": 22.0767, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08042895442359249, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 8.725107780743175, |
|
"grad_norm": 0.19069017469882965, |
|
"learning_rate": 0.00041359802509771654, |
|
"loss": 0.0692, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 8.725107780743175, |
|
"eval_cer": 0.022363744075829382, |
|
"eval_loss": 0.042362380772829056, |
|
"eval_runtime": 22.0792, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07864164432529044, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 8.827756107575446, |
|
"grad_norm": 0.26284459233283997, |
|
"learning_rate": 0.00041256943015840363, |
|
"loss": 0.0694, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 8.827756107575446, |
|
"eval_cer": 0.08486374407582939, |
|
"eval_loss": 0.04263956472277641, |
|
"eval_runtime": 39.7879, |
|
"eval_samples_per_second": 2.262, |
|
"eval_steps_per_second": 0.025, |
|
"eval_wer": 0.13762287756925826, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 8.93040443440772, |
|
"grad_norm": 0.12760530412197113, |
|
"learning_rate": 0.00041154083521909073, |
|
"loss": 0.0691, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 8.93040443440772, |
|
"eval_cer": 0.02502962085308057, |
|
"eval_loss": 0.043798867613077164, |
|
"eval_runtime": 23.4939, |
|
"eval_samples_per_second": 3.831, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.08936550491510277, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 9.033052761239992, |
|
"grad_norm": 0.3002149760723114, |
|
"learning_rate": 0.0004105122402797779, |
|
"loss": 0.0689, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 9.033052761239992, |
|
"eval_cer": 0.03036137440758294, |
|
"eval_loss": 0.04081055149435997, |
|
"eval_runtime": 28.0519, |
|
"eval_samples_per_second": 3.208, |
|
"eval_steps_per_second": 0.036, |
|
"eval_wer": 0.08489722966934764, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 9.135701088072265, |
|
"grad_norm": 0.3071158230304718, |
|
"learning_rate": 0.00040948364534046497, |
|
"loss": 0.0672, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 9.135701088072265, |
|
"eval_cer": 0.028880331753554502, |
|
"eval_loss": 0.0426529198884964, |
|
"eval_runtime": 21.5055, |
|
"eval_samples_per_second": 4.185, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08042895442359249, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 9.238349414904537, |
|
"grad_norm": 0.2852329909801483, |
|
"learning_rate": 0.00040845505040115206, |
|
"loss": 0.0672, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 9.238349414904537, |
|
"eval_cer": 0.023252369668246446, |
|
"eval_loss": 0.04157470539212227, |
|
"eval_runtime": 20.7416, |
|
"eval_samples_per_second": 4.339, |
|
"eval_steps_per_second": 0.048, |
|
"eval_wer": 0.0777479892761394, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 9.34099774173681, |
|
"grad_norm": 0.2886292338371277, |
|
"learning_rate": 0.00040742645546183915, |
|
"loss": 0.0677, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 9.34099774173681, |
|
"eval_cer": 0.029768957345971563, |
|
"eval_loss": 0.04205571115016937, |
|
"eval_runtime": 21.8561, |
|
"eval_samples_per_second": 4.118, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08310991957104558, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 9.443646068569082, |
|
"grad_norm": 0.20502915978431702, |
|
"learning_rate": 0.00040639786052252625, |
|
"loss": 0.068, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 9.443646068569082, |
|
"eval_cer": 0.050651658767772514, |
|
"eval_loss": 0.04323223605751991, |
|
"eval_runtime": 34.4159, |
|
"eval_samples_per_second": 2.615, |
|
"eval_steps_per_second": 0.029, |
|
"eval_wer": 0.10723860589812333, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 9.546294395401356, |
|
"grad_norm": 0.31369203329086304, |
|
"learning_rate": 0.00040536926558321334, |
|
"loss": 0.0675, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 9.546294395401356, |
|
"eval_cer": 0.023548578199052133, |
|
"eval_loss": 0.040974486619234085, |
|
"eval_runtime": 21.5483, |
|
"eval_samples_per_second": 4.177, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08042895442359249, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 9.648942722233627, |
|
"grad_norm": 0.2164764255285263, |
|
"learning_rate": 0.00040434067064390043, |
|
"loss": 0.0674, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 9.648942722233627, |
|
"eval_cer": 0.021623222748815167, |
|
"eval_loss": 0.04193877801299095, |
|
"eval_runtime": 20.5784, |
|
"eval_samples_per_second": 4.374, |
|
"eval_steps_per_second": 0.049, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 9.751591049065901, |
|
"grad_norm": 0.20598456263542175, |
|
"learning_rate": 0.0004033120757045876, |
|
"loss": 0.0683, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 9.751591049065901, |
|
"eval_cer": 0.021623222748815167, |
|
"eval_loss": 0.04229186475276947, |
|
"eval_runtime": 21.7515, |
|
"eval_samples_per_second": 4.138, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.07685433422698838, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 9.854239375898173, |
|
"grad_norm": 0.17217130959033966, |
|
"learning_rate": 0.0004022834807652747, |
|
"loss": 0.0679, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 9.854239375898173, |
|
"eval_cer": 0.02221563981042654, |
|
"eval_loss": 0.04353512451052666, |
|
"eval_runtime": 21.856, |
|
"eval_samples_per_second": 4.118, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.08042895442359249, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 9.956887702730446, |
|
"grad_norm": 0.30278000235557556, |
|
"learning_rate": 0.00040125488582596177, |
|
"loss": 0.0676, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 9.956887702730446, |
|
"eval_cer": 0.021623222748815167, |
|
"eval_loss": 0.04357661306858063, |
|
"eval_runtime": 21.958, |
|
"eval_samples_per_second": 4.099, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.07685433422698838, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 10.059536029562718, |
|
"grad_norm": 0.17858092486858368, |
|
"learning_rate": 0.00040022629088664886, |
|
"loss": 0.0659, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 10.059536029562718, |
|
"eval_cer": 0.02428909952606635, |
|
"eval_loss": 0.04361404851078987, |
|
"eval_runtime": 22.0796, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.08221626452189455, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 10.162184356394992, |
|
"grad_norm": 0.16541603207588196, |
|
"learning_rate": 0.00039919769594733595, |
|
"loss": 0.066, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 10.162184356394992, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.04267050698399544, |
|
"eval_runtime": 21.9748, |
|
"eval_samples_per_second": 4.096, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 10.264832683227263, |
|
"grad_norm": 0.16135546565055847, |
|
"learning_rate": 0.00039816910100802305, |
|
"loss": 0.066, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 10.264832683227263, |
|
"eval_cer": 0.02458530805687204, |
|
"eval_loss": 0.043835073709487915, |
|
"eval_runtime": 22.2259, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0840035746201966, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 10.367481010059535, |
|
"grad_norm": 0.3332918882369995, |
|
"learning_rate": 0.00039714050606871014, |
|
"loss": 0.0667, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 10.367481010059535, |
|
"eval_cer": 0.021919431279620854, |
|
"eval_loss": 0.04083102196455002, |
|
"eval_runtime": 22.2761, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0777479892761394, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 10.470129336891809, |
|
"grad_norm": 0.2997848093509674, |
|
"learning_rate": 0.0003961119111293973, |
|
"loss": 0.0659, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 10.470129336891809, |
|
"eval_cer": 0.021475118483412322, |
|
"eval_loss": 0.04166368022561073, |
|
"eval_runtime": 22.1637, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07327971403038427, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 10.572777663724082, |
|
"grad_norm": 0.2358590066432953, |
|
"learning_rate": 0.0003950833161900844, |
|
"loss": 0.0661, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 10.572777663724082, |
|
"eval_cer": 0.018957345971563982, |
|
"eval_loss": 0.04044094681739807, |
|
"eval_runtime": 22.1865, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 10.675425990556354, |
|
"grad_norm": 0.2147412747144699, |
|
"learning_rate": 0.0003940547212507715, |
|
"loss": 0.0662, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 10.675425990556354, |
|
"eval_cer": 0.022067535545023696, |
|
"eval_loss": 0.042136672884225845, |
|
"eval_runtime": 22.3442, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 10.778074317388626, |
|
"grad_norm": 0.21466469764709473, |
|
"learning_rate": 0.00039302612631145857, |
|
"loss": 0.066, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 10.778074317388626, |
|
"eval_cer": 0.020290284360189575, |
|
"eval_loss": 0.04227345809340477, |
|
"eval_runtime": 22.2243, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 10.8807226442209, |
|
"grad_norm": 0.20175763964653015, |
|
"learning_rate": 0.00039199753137214566, |
|
"loss": 0.0665, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 10.8807226442209, |
|
"eval_cer": 0.021919431279620854, |
|
"eval_loss": 0.04261546581983566, |
|
"eval_runtime": 22.0605, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07327971403038427, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 10.983370971053171, |
|
"grad_norm": 0.3140750229358673, |
|
"learning_rate": 0.00039096893643283275, |
|
"loss": 0.066, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 10.983370971053171, |
|
"eval_cer": 0.018957345971563982, |
|
"eval_loss": 0.040347784757614136, |
|
"eval_runtime": 23.0302, |
|
"eval_samples_per_second": 3.908, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 11.086019297885445, |
|
"grad_norm": 0.2863214313983917, |
|
"learning_rate": 0.00038994034149351985, |
|
"loss": 0.0643, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 11.086019297885445, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.040664974600076675, |
|
"eval_runtime": 22.169, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 11.188667624717716, |
|
"grad_norm": 0.1757292002439499, |
|
"learning_rate": 0.000388911746554207, |
|
"loss": 0.0644, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 11.188667624717716, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.03967958316206932, |
|
"eval_runtime": 22.6062, |
|
"eval_samples_per_second": 3.981, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 11.29131595154999, |
|
"grad_norm": 0.25020548701286316, |
|
"learning_rate": 0.0003878831516148941, |
|
"loss": 0.0646, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 11.29131595154999, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.0391419492661953, |
|
"eval_runtime": 21.9075, |
|
"eval_samples_per_second": 4.108, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 11.393964278382262, |
|
"grad_norm": 0.2499699741601944, |
|
"learning_rate": 0.0003868545566755812, |
|
"loss": 0.0643, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 11.393964278382262, |
|
"eval_cer": 0.02058649289099526, |
|
"eval_loss": 0.04037711024284363, |
|
"eval_runtime": 22.1604, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 11.496612605214535, |
|
"grad_norm": 0.16554132103919983, |
|
"learning_rate": 0.0003858259617362683, |
|
"loss": 0.0649, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 11.496612605214535, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.04021435230970383, |
|
"eval_runtime": 22.1316, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07327971403038427, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 11.599260932046807, |
|
"grad_norm": 0.18748946487903595, |
|
"learning_rate": 0.00038479736679695537, |
|
"loss": 0.0655, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 11.599260932046807, |
|
"eval_cer": 0.022511848341232227, |
|
"eval_loss": 0.040546808391809464, |
|
"eval_runtime": 22.1622, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07596067917783736, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 11.70190925887908, |
|
"grad_norm": 0.4686923623085022, |
|
"learning_rate": 0.00038376877185764246, |
|
"loss": 0.0647, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 11.70190925887908, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.04154360666871071, |
|
"eval_runtime": 22.0458, |
|
"eval_samples_per_second": 4.082, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 11.804557585711352, |
|
"grad_norm": 0.20417150855064392, |
|
"learning_rate": 0.00038274017691832955, |
|
"loss": 0.0649, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 11.804557585711352, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.04089859500527382, |
|
"eval_runtime": 22.0112, |
|
"eval_samples_per_second": 4.089, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 11.907205912543626, |
|
"grad_norm": 0.27801695466041565, |
|
"learning_rate": 0.0003817115819790167, |
|
"loss": 0.0643, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 11.907205912543626, |
|
"eval_cer": 0.020438388625592416, |
|
"eval_loss": 0.04090258479118347, |
|
"eval_runtime": 22.1223, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 12.009854239375898, |
|
"grad_norm": 0.24761049449443817, |
|
"learning_rate": 0.0003806829870397038, |
|
"loss": 0.0641, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 12.009854239375898, |
|
"eval_cer": 0.02103080568720379, |
|
"eval_loss": 0.041068486869335175, |
|
"eval_runtime": 22.0016, |
|
"eval_samples_per_second": 4.091, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 12.112502566208171, |
|
"grad_norm": 0.18434394896030426, |
|
"learning_rate": 0.0003796543921003909, |
|
"loss": 0.0625, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 12.112502566208171, |
|
"eval_cer": 0.020438388625592416, |
|
"eval_loss": 0.04070517420768738, |
|
"eval_runtime": 22.1605, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 12.215150893040443, |
|
"grad_norm": 0.24979180097579956, |
|
"learning_rate": 0.000378625797161078, |
|
"loss": 0.0629, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 12.215150893040443, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.038933165371418, |
|
"eval_runtime": 22.1583, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 12.317799219872716, |
|
"grad_norm": 0.29532870650291443, |
|
"learning_rate": 0.0003775972022217651, |
|
"loss": 0.0634, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 12.317799219872716, |
|
"eval_cer": 0.020438388625592416, |
|
"eval_loss": 0.04176652058959007, |
|
"eval_runtime": 22.5206, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.07506702412868632, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 12.420447546704988, |
|
"grad_norm": 0.2764800190925598, |
|
"learning_rate": 0.00037656860728245217, |
|
"loss": 0.0628, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 12.420447546704988, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.038935501128435135, |
|
"eval_runtime": 22.4914, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 12.523095873537262, |
|
"grad_norm": 0.20742572844028473, |
|
"learning_rate": 0.00037554001234313926, |
|
"loss": 0.0638, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 12.523095873537262, |
|
"eval_cer": 0.020734597156398103, |
|
"eval_loss": 0.040505990386009216, |
|
"eval_runtime": 22.4563, |
|
"eval_samples_per_second": 4.008, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 12.625744200369533, |
|
"grad_norm": 0.22677256166934967, |
|
"learning_rate": 0.0003745114174038264, |
|
"loss": 0.063, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 12.625744200369533, |
|
"eval_cer": 0.02058649289099526, |
|
"eval_loss": 0.0383678562939167, |
|
"eval_runtime": 22.3288, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 12.728392527201807, |
|
"grad_norm": 0.2105027288198471, |
|
"learning_rate": 0.0003734828224645135, |
|
"loss": 0.0633, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 12.728392527201807, |
|
"eval_cer": 0.01925355450236967, |
|
"eval_loss": 0.03879451006650925, |
|
"eval_runtime": 22.3197, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 12.831040854034079, |
|
"grad_norm": 0.21093320846557617, |
|
"learning_rate": 0.0003724542275252006, |
|
"loss": 0.0636, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 12.831040854034079, |
|
"eval_cer": 0.020734597156398103, |
|
"eval_loss": 0.038905300199985504, |
|
"eval_runtime": 22.1791, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 12.933689180866352, |
|
"grad_norm": 0.21501018106937408, |
|
"learning_rate": 0.0003714256325858877, |
|
"loss": 0.0634, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 12.933689180866352, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.03828004002571106, |
|
"eval_runtime": 22.175, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 13.036337507698624, |
|
"grad_norm": 0.2869448661804199, |
|
"learning_rate": 0.0003703970376465748, |
|
"loss": 0.0623, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 13.036337507698624, |
|
"eval_cer": 0.02221563981042654, |
|
"eval_loss": 0.03908955305814743, |
|
"eval_runtime": 22.0973, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 13.138985834530898, |
|
"grad_norm": 0.20509861409664154, |
|
"learning_rate": 0.00036936844270726187, |
|
"loss": 0.0616, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 13.138985834530898, |
|
"eval_cer": 0.021919431279620854, |
|
"eval_loss": 0.0386139452457428, |
|
"eval_runtime": 22.2024, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 13.24163416136317, |
|
"grad_norm": 0.17765522003173828, |
|
"learning_rate": 0.00036833984776794897, |
|
"loss": 0.0618, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 13.24163416136317, |
|
"eval_cer": 0.01925355450236967, |
|
"eval_loss": 0.0378284677863121, |
|
"eval_runtime": 22.0041, |
|
"eval_samples_per_second": 4.09, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 13.344282488195443, |
|
"grad_norm": 0.24877817928791046, |
|
"learning_rate": 0.0003673112528286361, |
|
"loss": 0.0619, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 13.344282488195443, |
|
"eval_cer": 0.021475118483412322, |
|
"eval_loss": 0.03674837946891785, |
|
"eval_runtime": 22.0962, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 13.446930815027715, |
|
"grad_norm": 0.25357529520988464, |
|
"learning_rate": 0.0003662826578893232, |
|
"loss": 0.0623, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 13.446930815027715, |
|
"eval_cer": 0.019994075829383885, |
|
"eval_loss": 0.03821048513054848, |
|
"eval_runtime": 22.1382, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 13.549579141859988, |
|
"grad_norm": 0.32752060890197754, |
|
"learning_rate": 0.0003652540629500103, |
|
"loss": 0.0622, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 13.549579141859988, |
|
"eval_cer": 0.02014218009478673, |
|
"eval_loss": 0.038477059453725815, |
|
"eval_runtime": 22.2355, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 13.65222746869226, |
|
"grad_norm": 0.22504030168056488, |
|
"learning_rate": 0.0003642254680106974, |
|
"loss": 0.0622, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 13.65222746869226, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.038305170834064484, |
|
"eval_runtime": 22.2134, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 13.754875795524534, |
|
"grad_norm": 0.23793594539165497, |
|
"learning_rate": 0.0003631968730713845, |
|
"loss": 0.062, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 13.754875795524534, |
|
"eval_cer": 0.02058649289099526, |
|
"eval_loss": 0.03838730975985527, |
|
"eval_runtime": 22.3142, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 13.857524122356805, |
|
"grad_norm": 0.18214410543441772, |
|
"learning_rate": 0.0003621682781320716, |
|
"loss": 0.0625, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 13.857524122356805, |
|
"eval_cer": 0.02014218009478673, |
|
"eval_loss": 0.03858475759625435, |
|
"eval_runtime": 22.3822, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 13.960172449189079, |
|
"grad_norm": 0.24008499085903168, |
|
"learning_rate": 0.00036113968319275867, |
|
"loss": 0.0621, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 13.960172449189079, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.03749372810125351, |
|
"eval_runtime": 22.3573, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 14.06282077602135, |
|
"grad_norm": 0.20032504200935364, |
|
"learning_rate": 0.0003601110882534458, |
|
"loss": 0.0616, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 14.06282077602135, |
|
"eval_cer": 0.02058649289099526, |
|
"eval_loss": 0.03601989150047302, |
|
"eval_runtime": 22.1524, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 14.165469102853624, |
|
"grad_norm": 0.29345226287841797, |
|
"learning_rate": 0.0003590824933141329, |
|
"loss": 0.0611, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 14.165469102853624, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.036261823028326035, |
|
"eval_runtime": 22.1903, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 14.268117429685896, |
|
"grad_norm": 0.19695940613746643, |
|
"learning_rate": 0.00035805389837482, |
|
"loss": 0.0606, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 14.268117429685896, |
|
"eval_cer": 0.021623222748815167, |
|
"eval_loss": 0.0368872731924057, |
|
"eval_runtime": 22.1734, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07238605898123325, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 14.37076575651817, |
|
"grad_norm": 0.1869696080684662, |
|
"learning_rate": 0.0003570253034355071, |
|
"loss": 0.0609, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 14.37076575651817, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.036232832819223404, |
|
"eval_runtime": 22.2094, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 14.473414083350441, |
|
"grad_norm": 0.3025355041027069, |
|
"learning_rate": 0.0003559967084961942, |
|
"loss": 0.0611, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 14.473414083350441, |
|
"eval_cer": 0.02088270142180095, |
|
"eval_loss": 0.036524925380945206, |
|
"eval_runtime": 22.0892, |
|
"eval_samples_per_second": 4.074, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 14.576062410182715, |
|
"grad_norm": 0.2218203842639923, |
|
"learning_rate": 0.0003549681135568813, |
|
"loss": 0.0612, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 14.576062410182715, |
|
"eval_cer": 0.02058649289099526, |
|
"eval_loss": 0.03738318383693695, |
|
"eval_runtime": 22.4443, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 14.678710737014987, |
|
"grad_norm": 0.18811815977096558, |
|
"learning_rate": 0.0003539395186175684, |
|
"loss": 0.0606, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 14.678710737014987, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.03684472665190697, |
|
"eval_runtime": 22.3128, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07238605898123325, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 14.78135906384726, |
|
"grad_norm": 0.20071063935756683, |
|
"learning_rate": 0.0003529109236782555, |
|
"loss": 0.0609, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 14.78135906384726, |
|
"eval_cer": 0.018364928909952605, |
|
"eval_loss": 0.037738535553216934, |
|
"eval_runtime": 22.5071, |
|
"eval_samples_per_second": 3.999, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.064343163538874, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 14.884007390679532, |
|
"grad_norm": 0.2046099305152893, |
|
"learning_rate": 0.0003518823287389426, |
|
"loss": 0.0614, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 14.884007390679532, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.03707651048898697, |
|
"eval_runtime": 22.4459, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 14.986655717511805, |
|
"grad_norm": 0.27886244654655457, |
|
"learning_rate": 0.0003508537337996297, |
|
"loss": 0.0616, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 14.986655717511805, |
|
"eval_cer": 0.022511848341232227, |
|
"eval_loss": 0.038485873490571976, |
|
"eval_runtime": 22.2607, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07685433422698838, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 15.089304044344077, |
|
"grad_norm": 0.3120444118976593, |
|
"learning_rate": 0.0003498251388603168, |
|
"loss": 0.0598, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 15.089304044344077, |
|
"eval_cer": 0.021475118483412322, |
|
"eval_loss": 0.03692319989204407, |
|
"eval_runtime": 22.2577, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07685433422698838, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 15.19195237117635, |
|
"grad_norm": 0.2510223388671875, |
|
"learning_rate": 0.0003487965439210039, |
|
"loss": 0.06, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 15.19195237117635, |
|
"eval_cer": 0.019549763033175356, |
|
"eval_loss": 0.036081377416849136, |
|
"eval_runtime": 22.3708, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06881143878462913, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 15.294600698008622, |
|
"grad_norm": 0.2539554536342621, |
|
"learning_rate": 0.000347767948981691, |
|
"loss": 0.0603, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 15.294600698008622, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.036284659057855606, |
|
"eval_runtime": 22.2203, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 15.397249024840896, |
|
"grad_norm": 0.3537413775920868, |
|
"learning_rate": 0.0003467393540423781, |
|
"loss": 0.0602, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 15.397249024840896, |
|
"eval_cer": 0.01925355450236967, |
|
"eval_loss": 0.037329014390707016, |
|
"eval_runtime": 22.3303, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 15.499897351673168, |
|
"grad_norm": 0.31098708510398865, |
|
"learning_rate": 0.00034571075910306523, |
|
"loss": 0.0599, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 15.499897351673168, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.0374312698841095, |
|
"eval_runtime": 22.4269, |
|
"eval_samples_per_second": 4.013, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 15.60254567850544, |
|
"grad_norm": 0.18067589402198792, |
|
"learning_rate": 0.0003446821641637523, |
|
"loss": 0.0603, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 15.60254567850544, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.03638828173279762, |
|
"eval_runtime": 22.5221, |
|
"eval_samples_per_second": 3.996, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 15.705194005337713, |
|
"grad_norm": 0.26867741346359253, |
|
"learning_rate": 0.0003436535692244394, |
|
"loss": 0.0605, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 15.705194005337713, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.03720884397625923, |
|
"eval_runtime": 22.3098, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 15.807842332169985, |
|
"grad_norm": 0.18743161857128143, |
|
"learning_rate": 0.0003426249742851265, |
|
"loss": 0.06, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 15.807842332169985, |
|
"eval_cer": 0.021919431279620854, |
|
"eval_loss": 0.03767675533890724, |
|
"eval_runtime": 22.2871, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0741733690795353, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 15.910490659002258, |
|
"grad_norm": 0.2153768688440323, |
|
"learning_rate": 0.0003415963793458136, |
|
"loss": 0.0599, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 15.910490659002258, |
|
"eval_cer": 0.020290284360189575, |
|
"eval_loss": 0.03886905685067177, |
|
"eval_runtime": 22.1149, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07149240393208221, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 16.01313898583453, |
|
"grad_norm": 0.24325600266456604, |
|
"learning_rate": 0.0003405677844065007, |
|
"loss": 0.0598, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 16.01313898583453, |
|
"eval_cer": 0.02177132701421801, |
|
"eval_loss": 0.03822890669107437, |
|
"eval_runtime": 22.2699, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 16.115787312666804, |
|
"grad_norm": 0.2438814342021942, |
|
"learning_rate": 0.0003395391894671878, |
|
"loss": 0.0586, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 16.115787312666804, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.038069114089012146, |
|
"eval_runtime": 22.2427, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 16.218435639499077, |
|
"grad_norm": 0.22072641551494598, |
|
"learning_rate": 0.00033851059452787494, |
|
"loss": 0.0584, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 16.218435639499077, |
|
"eval_cer": 0.0173281990521327, |
|
"eval_loss": 0.03663622587919235, |
|
"eval_runtime": 22.2026, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 16.321083966331347, |
|
"grad_norm": 0.15790699422359467, |
|
"learning_rate": 0.00033748199958856203, |
|
"loss": 0.0587, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 16.321083966331347, |
|
"eval_cer": 0.019549763033175356, |
|
"eval_loss": 0.03735322132706642, |
|
"eval_runtime": 22.315, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 16.42373229316362, |
|
"grad_norm": 0.1823723465204239, |
|
"learning_rate": 0.0003364534046492491, |
|
"loss": 0.0591, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 16.42373229316362, |
|
"eval_cer": 0.021178909952606635, |
|
"eval_loss": 0.03700649365782738, |
|
"eval_runtime": 22.6895, |
|
"eval_samples_per_second": 3.967, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 16.526380619995894, |
|
"grad_norm": 0.24684032797813416, |
|
"learning_rate": 0.0003354248097099362, |
|
"loss": 0.0595, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 16.526380619995894, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.03638649359345436, |
|
"eval_runtime": 22.3004, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 16.629028946828168, |
|
"grad_norm": 0.2088789939880371, |
|
"learning_rate": 0.0003343962147706233, |
|
"loss": 0.059, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 16.629028946828168, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03632904216647148, |
|
"eval_runtime": 22.2253, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 16.731677273660438, |
|
"grad_norm": 0.20426543056964874, |
|
"learning_rate": 0.0003333676198313104, |
|
"loss": 0.0598, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 16.731677273660438, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.03629469498991966, |
|
"eval_runtime": 22.3453, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 16.83432560049271, |
|
"grad_norm": 0.2737009525299072, |
|
"learning_rate": 0.0003323390248919975, |
|
"loss": 0.0591, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 16.83432560049271, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03445509076118469, |
|
"eval_runtime": 22.3887, |
|
"eval_samples_per_second": 4.02, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 16.936973927324985, |
|
"grad_norm": 0.33581215143203735, |
|
"learning_rate": 0.00033131042995268465, |
|
"loss": 0.0586, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 16.936973927324985, |
|
"eval_cer": 0.020290284360189575, |
|
"eval_loss": 0.036238256841897964, |
|
"eval_runtime": 22.1998, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 17.03962225415726, |
|
"grad_norm": 0.277771532535553, |
|
"learning_rate": 0.00033028183501337174, |
|
"loss": 0.0591, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 17.03962225415726, |
|
"eval_cer": 0.01806872037914692, |
|
"eval_loss": 0.035338886082172394, |
|
"eval_runtime": 22.132, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 17.14227058098953, |
|
"grad_norm": 0.17568770051002502, |
|
"learning_rate": 0.00032925324007405883, |
|
"loss": 0.0578, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 17.14227058098953, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03632746636867523, |
|
"eval_runtime": 22.2554, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 17.244918907821802, |
|
"grad_norm": 0.22311237454414368, |
|
"learning_rate": 0.0003282246451347459, |
|
"loss": 0.0576, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 17.244918907821802, |
|
"eval_cer": 0.01925355450236967, |
|
"eval_loss": 0.036676324903964996, |
|
"eval_runtime": 22.484, |
|
"eval_samples_per_second": 4.003, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 17.347567234654075, |
|
"grad_norm": 0.3649640381336212, |
|
"learning_rate": 0.000327196050195433, |
|
"loss": 0.0576, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 17.347567234654075, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.035018790513277054, |
|
"eval_runtime": 22.2563, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 17.45021556148635, |
|
"grad_norm": 0.2809694707393646, |
|
"learning_rate": 0.0003261674552561201, |
|
"loss": 0.0582, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 17.45021556148635, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.035496581345796585, |
|
"eval_runtime": 22.1719, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 17.55286388831862, |
|
"grad_norm": 0.23279882967472076, |
|
"learning_rate": 0.0003251388603168072, |
|
"loss": 0.0587, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 17.55286388831862, |
|
"eval_cer": 0.018364928909952605, |
|
"eval_loss": 0.03641456365585327, |
|
"eval_runtime": 22.1562, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 17.655512215150893, |
|
"grad_norm": 0.23531590402126312, |
|
"learning_rate": 0.00032411026537749435, |
|
"loss": 0.0587, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 17.655512215150893, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.034297019243240356, |
|
"eval_runtime": 22.3222, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 17.758160541983166, |
|
"grad_norm": 0.38004380464553833, |
|
"learning_rate": 0.00032308167043818144, |
|
"loss": 0.0589, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 17.758160541983166, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03497479110956192, |
|
"eval_runtime": 22.1444, |
|
"eval_samples_per_second": 4.064, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 17.86080886881544, |
|
"grad_norm": 0.22771824896335602, |
|
"learning_rate": 0.00032205307549886854, |
|
"loss": 0.0578, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 17.86080886881544, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.034568045288324356, |
|
"eval_runtime": 22.2545, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 17.96345719564771, |
|
"grad_norm": 0.3113113343715668, |
|
"learning_rate": 0.00032102448055955563, |
|
"loss": 0.0586, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 17.96345719564771, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.035004787147045135, |
|
"eval_runtime": 22.3269, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 18.066105522479983, |
|
"grad_norm": 0.24952055513858795, |
|
"learning_rate": 0.0003199958856202427, |
|
"loss": 0.0571, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 18.066105522479983, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.03602117672562599, |
|
"eval_runtime": 22.4032, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 18.168753849312257, |
|
"grad_norm": 0.33917441964149475, |
|
"learning_rate": 0.0003189672906809298, |
|
"loss": 0.0569, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 18.168753849312257, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.036349765956401825, |
|
"eval_runtime": 22.273, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 18.27140217614453, |
|
"grad_norm": 0.18371905386447906, |
|
"learning_rate": 0.0003179386957416169, |
|
"loss": 0.0566, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 18.27140217614453, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.03550698608160019, |
|
"eval_runtime": 22.3312, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 18.3740505029768, |
|
"grad_norm": 0.2601664662361145, |
|
"learning_rate": 0.00031691010080230406, |
|
"loss": 0.057, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 18.3740505029768, |
|
"eval_cer": 0.018957345971563982, |
|
"eval_loss": 0.035991400480270386, |
|
"eval_runtime": 22.2432, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 18.476698829809074, |
|
"grad_norm": 0.17889824509620667, |
|
"learning_rate": 0.00031588150586299115, |
|
"loss": 0.057, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 18.476698829809074, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.03573347255587578, |
|
"eval_runtime": 22.2321, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 18.579347156641347, |
|
"grad_norm": 0.5736141800880432, |
|
"learning_rate": 0.00031485291092367824, |
|
"loss": 0.0578, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 18.579347156641347, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.035990502685308456, |
|
"eval_runtime": 22.2434, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 18.68199548347362, |
|
"grad_norm": 0.2320089340209961, |
|
"learning_rate": 0.00031382431598436534, |
|
"loss": 0.0581, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 18.68199548347362, |
|
"eval_cer": 0.0173281990521327, |
|
"eval_loss": 0.035308029502630234, |
|
"eval_runtime": 22.161, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 18.78464381030589, |
|
"grad_norm": 0.31181567907333374, |
|
"learning_rate": 0.00031279572104505243, |
|
"loss": 0.0575, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 18.78464381030589, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03400159254670143, |
|
"eval_runtime": 22.1991, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 18.887292137138164, |
|
"grad_norm": 0.26530441641807556, |
|
"learning_rate": 0.0003117671261057395, |
|
"loss": 0.0574, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 18.887292137138164, |
|
"eval_cer": 0.020290284360189575, |
|
"eval_loss": 0.03510544076561928, |
|
"eval_runtime": 22.2589, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 18.989940463970438, |
|
"grad_norm": 0.2680375576019287, |
|
"learning_rate": 0.0003107385311664266, |
|
"loss": 0.0576, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 18.989940463970438, |
|
"eval_cer": 0.021178909952606635, |
|
"eval_loss": 0.037547577172517776, |
|
"eval_runtime": 22.2321, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07238605898123325, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 19.09258879080271, |
|
"grad_norm": 0.2254854440689087, |
|
"learning_rate": 0.00030970993622711376, |
|
"loss": 0.0561, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 19.09258879080271, |
|
"eval_cer": 0.01940165876777251, |
|
"eval_loss": 0.03498771786689758, |
|
"eval_runtime": 22.2675, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 19.19523711763498, |
|
"grad_norm": 0.2187499850988388, |
|
"learning_rate": 0.00030868134128780086, |
|
"loss": 0.0563, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 19.19523711763498, |
|
"eval_cer": 0.020734597156398103, |
|
"eval_loss": 0.035063955932855606, |
|
"eval_runtime": 22.2231, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 19.297885444467255, |
|
"grad_norm": 0.3217693567276001, |
|
"learning_rate": 0.000307652746348488, |
|
"loss": 0.0566, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 19.297885444467255, |
|
"eval_cer": 0.020734597156398103, |
|
"eval_loss": 0.034946467727422714, |
|
"eval_runtime": 22.2783, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 19.40053377129953, |
|
"grad_norm": 0.2570216655731201, |
|
"learning_rate": 0.0003066241514091751, |
|
"loss": 0.056, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 19.40053377129953, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.03498660773038864, |
|
"eval_runtime": 22.3189, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 19.503182098131802, |
|
"grad_norm": 0.24969050288200378, |
|
"learning_rate": 0.0003055955564698622, |
|
"loss": 0.0565, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 19.503182098131802, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.034704625606536865, |
|
"eval_runtime": 22.2267, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 19.605830424964072, |
|
"grad_norm": 0.19874796271324158, |
|
"learning_rate": 0.0003045669615305493, |
|
"loss": 0.0565, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 19.605830424964072, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03414672613143921, |
|
"eval_runtime": 22.4133, |
|
"eval_samples_per_second": 4.015, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 19.708478751796346, |
|
"grad_norm": 0.21598631143569946, |
|
"learning_rate": 0.0003035383665912364, |
|
"loss": 0.0569, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 19.708478751796346, |
|
"eval_cer": 0.02088270142180095, |
|
"eval_loss": 0.03441624715924263, |
|
"eval_runtime": 22.4978, |
|
"eval_samples_per_second": 4.0, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.0679177837354781, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 19.81112707862862, |
|
"grad_norm": 0.740821361541748, |
|
"learning_rate": 0.0003025097716519235, |
|
"loss": 0.0566, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 19.81112707862862, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.03505128249526024, |
|
"eval_runtime": 22.2297, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 19.913775405460893, |
|
"grad_norm": 0.25967320799827576, |
|
"learning_rate": 0.0003014811767126106, |
|
"loss": 0.0566, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 19.913775405460893, |
|
"eval_cer": 0.02088270142180095, |
|
"eval_loss": 0.03540065139532089, |
|
"eval_runtime": 22.328, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07327971403038427, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 20.016423732293163, |
|
"grad_norm": 0.2352762520313263, |
|
"learning_rate": 0.0003004525817732977, |
|
"loss": 0.0563, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 20.016423732293163, |
|
"eval_cer": 0.021178909952606635, |
|
"eval_loss": 0.0357496440410614, |
|
"eval_runtime": 22.3495, |
|
"eval_samples_per_second": 4.027, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.07059874888293119, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 20.119072059125436, |
|
"grad_norm": 0.31662923097610474, |
|
"learning_rate": 0.0002994239868339848, |
|
"loss": 0.0556, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 20.119072059125436, |
|
"eval_cer": 0.019697867298578198, |
|
"eval_loss": 0.03499302640557289, |
|
"eval_runtime": 22.3332, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 20.22172038595771, |
|
"grad_norm": 0.16036640107631683, |
|
"learning_rate": 0.0002983953918946719, |
|
"loss": 0.0556, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 20.22172038595771, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.03571586683392525, |
|
"eval_runtime": 22.3554, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06970509383378017, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 20.324368712789983, |
|
"grad_norm": 0.18146023154258728, |
|
"learning_rate": 0.000297366796955359, |
|
"loss": 0.0556, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 20.324368712789983, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.03470243141055107, |
|
"eval_runtime": 22.2669, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 20.427017039622253, |
|
"grad_norm": 0.18593771755695343, |
|
"learning_rate": 0.0002963382020160461, |
|
"loss": 0.0553, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 20.427017039622253, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.034099601209163666, |
|
"eval_runtime": 22.1758, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 20.529665366454527, |
|
"grad_norm": 0.16802427172660828, |
|
"learning_rate": 0.00029530960707673323, |
|
"loss": 0.0558, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 20.529665366454527, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.034865912050008774, |
|
"eval_runtime": 22.2605, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 20.6323136932868, |
|
"grad_norm": 0.20501871407032013, |
|
"learning_rate": 0.0002942810121374203, |
|
"loss": 0.0559, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 20.6323136932868, |
|
"eval_cer": 0.018957345971563982, |
|
"eval_loss": 0.03544703871011734, |
|
"eval_runtime": 22.0876, |
|
"eval_samples_per_second": 4.075, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 20.73496202011907, |
|
"grad_norm": 0.19251494109630585, |
|
"learning_rate": 0.0002932524171981074, |
|
"loss": 0.0555, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 20.73496202011907, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.03462394326925278, |
|
"eval_runtime": 22.1969, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 20.837610346951344, |
|
"grad_norm": 0.12611526250839233, |
|
"learning_rate": 0.0002922238222587945, |
|
"loss": 0.0558, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 20.837610346951344, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03471866995096207, |
|
"eval_runtime": 22.9419, |
|
"eval_samples_per_second": 3.923, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 20.940258673783617, |
|
"grad_norm": 0.23571810126304626, |
|
"learning_rate": 0.0002911952273194816, |
|
"loss": 0.0553, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 20.940258673783617, |
|
"eval_cer": 0.016735781990521326, |
|
"eval_loss": 0.033052537590265274, |
|
"eval_runtime": 22.2403, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 21.04290700061589, |
|
"grad_norm": 0.16654832661151886, |
|
"learning_rate": 0.0002901666323801687, |
|
"loss": 0.0552, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 21.04290700061589, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03388543054461479, |
|
"eval_runtime": 22.3538, |
|
"eval_samples_per_second": 4.026, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 21.14555532744816, |
|
"grad_norm": 0.30480700731277466, |
|
"learning_rate": 0.0002891380374408558, |
|
"loss": 0.054, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 21.14555532744816, |
|
"eval_cer": 0.017180094786729858, |
|
"eval_loss": 0.03281432017683983, |
|
"eval_runtime": 22.4896, |
|
"eval_samples_per_second": 4.002, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 21.248203654280434, |
|
"grad_norm": 0.22593766450881958, |
|
"learning_rate": 0.00028810944250154294, |
|
"loss": 0.0544, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 21.248203654280434, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03340643644332886, |
|
"eval_runtime": 22.4028, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 21.350851981112708, |
|
"grad_norm": 0.1935175508260727, |
|
"learning_rate": 0.00028708084756223003, |
|
"loss": 0.0549, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 21.350851981112708, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.03223665431141853, |
|
"eval_runtime": 22.4082, |
|
"eval_samples_per_second": 4.016, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 21.45350030794498, |
|
"grad_norm": 0.18650276958942413, |
|
"learning_rate": 0.0002860522526229171, |
|
"loss": 0.0549, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 21.45350030794498, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.0338866226375103, |
|
"eval_runtime": 22.2564, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 21.55614863477725, |
|
"grad_norm": 0.2135065495967865, |
|
"learning_rate": 0.0002850236576836042, |
|
"loss": 0.055, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 21.55614863477725, |
|
"eval_cer": 0.019845971563981043, |
|
"eval_loss": 0.033525411039590836, |
|
"eval_runtime": 22.1326, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 21.658796961609525, |
|
"grad_norm": 0.21206118166446686, |
|
"learning_rate": 0.0002839950627442913, |
|
"loss": 0.0547, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 21.658796961609525, |
|
"eval_cer": 0.01940165876777251, |
|
"eval_loss": 0.033743493258953094, |
|
"eval_runtime": 22.1212, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 21.7614452884418, |
|
"grad_norm": 0.41899242997169495, |
|
"learning_rate": 0.0002829664678049784, |
|
"loss": 0.0551, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 21.7614452884418, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.03284008055925369, |
|
"eval_runtime": 22.3424, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 21.864093615274072, |
|
"grad_norm": 0.16063542664051056, |
|
"learning_rate": 0.0002819378728656655, |
|
"loss": 0.0547, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 21.864093615274072, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.03267466276884079, |
|
"eval_runtime": 22.247, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 21.966741942106342, |
|
"grad_norm": 0.3092317283153534, |
|
"learning_rate": 0.00028090927792635264, |
|
"loss": 0.055, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 21.966741942106342, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.032835327088832855, |
|
"eval_runtime": 22.1659, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 22.069390268938616, |
|
"grad_norm": 0.21150179207324982, |
|
"learning_rate": 0.00027988068298703974, |
|
"loss": 0.0544, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 22.069390268938616, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.03344175964593887, |
|
"eval_runtime": 22.2595, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 22.17203859577089, |
|
"grad_norm": 0.7867759466171265, |
|
"learning_rate": 0.00027885208804772683, |
|
"loss": 0.0535, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 22.17203859577089, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.03438210114836693, |
|
"eval_runtime": 22.4442, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 22.274686922603163, |
|
"grad_norm": 0.37553030252456665, |
|
"learning_rate": 0.0002778234931084139, |
|
"loss": 0.0546, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 22.274686922603163, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.033362455666065216, |
|
"eval_runtime": 22.2715, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 22.377335249435433, |
|
"grad_norm": 0.29263338446617126, |
|
"learning_rate": 0.000276794898169101, |
|
"loss": 0.0537, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 22.377335249435433, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03228195384144783, |
|
"eval_runtime": 22.3277, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 22.479983576267706, |
|
"grad_norm": 0.25038591027259827, |
|
"learning_rate": 0.0002757663032297881, |
|
"loss": 0.0533, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 22.479983576267706, |
|
"eval_cer": 0.018957345971563982, |
|
"eval_loss": 0.033720944076776505, |
|
"eval_runtime": 22.5305, |
|
"eval_samples_per_second": 3.995, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 22.58263190309998, |
|
"grad_norm": 0.1750280261039734, |
|
"learning_rate": 0.0002747377082904752, |
|
"loss": 0.055, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 22.58263190309998, |
|
"eval_cer": 0.016735781990521326, |
|
"eval_loss": 0.03310966119170189, |
|
"eval_runtime": 22.4023, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 22.685280229932253, |
|
"grad_norm": 0.24312840402126312, |
|
"learning_rate": 0.00027370911335116235, |
|
"loss": 0.0541, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 22.685280229932253, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.03354882076382637, |
|
"eval_runtime": 22.1565, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 22.787928556764523, |
|
"grad_norm": 0.31420522928237915, |
|
"learning_rate": 0.00027268051841184944, |
|
"loss": 0.0541, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 22.787928556764523, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.03195945546030998, |
|
"eval_runtime": 22.1178, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 22.890576883596797, |
|
"grad_norm": 0.5160701870918274, |
|
"learning_rate": 0.00027165192347253654, |
|
"loss": 0.0544, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 22.890576883596797, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.0328957661986351, |
|
"eval_runtime": 22.2935, |
|
"eval_samples_per_second": 4.037, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 22.99322521042907, |
|
"grad_norm": 0.18564113974571228, |
|
"learning_rate": 0.00027062332853322363, |
|
"loss": 0.0538, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 22.99322521042907, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.033330611884593964, |
|
"eval_runtime": 22.2627, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 23.095873537261344, |
|
"grad_norm": 0.19497232139110565, |
|
"learning_rate": 0.0002695947335939107, |
|
"loss": 0.0527, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 23.095873537261344, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.03324893116950989, |
|
"eval_runtime": 22.2973, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 23.198521864093614, |
|
"grad_norm": 0.24956081807613373, |
|
"learning_rate": 0.0002685661386545978, |
|
"loss": 0.0529, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 23.198521864093614, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03351821005344391, |
|
"eval_runtime": 22.2956, |
|
"eval_samples_per_second": 4.037, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 23.301170190925887, |
|
"grad_norm": 0.1977517157793045, |
|
"learning_rate": 0.00026753754371528496, |
|
"loss": 0.0532, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 23.301170190925887, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03354490175843239, |
|
"eval_runtime": 22.2726, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 23.40381851775816, |
|
"grad_norm": 0.23078420758247375, |
|
"learning_rate": 0.00026650894877597206, |
|
"loss": 0.0537, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 23.40381851775816, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.03308222442865372, |
|
"eval_runtime": 22.3431, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 23.506466844590435, |
|
"grad_norm": 0.3596203029155731, |
|
"learning_rate": 0.00026548035383665915, |
|
"loss": 0.0534, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 23.506466844590435, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.03161655366420746, |
|
"eval_runtime": 22.3127, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 23.609115171422705, |
|
"grad_norm": 0.26955559849739075, |
|
"learning_rate": 0.00026445175889734624, |
|
"loss": 0.0538, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 23.609115171422705, |
|
"eval_cer": 0.01940165876777251, |
|
"eval_loss": 0.03247096389532089, |
|
"eval_runtime": 22.3142, |
|
"eval_samples_per_second": 4.033, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 23.711763498254978, |
|
"grad_norm": 0.305859237909317, |
|
"learning_rate": 0.00026342316395803334, |
|
"loss": 0.0534, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 23.711763498254978, |
|
"eval_cer": 0.017180094786729858, |
|
"eval_loss": 0.03188026696443558, |
|
"eval_runtime": 22.28, |
|
"eval_samples_per_second": 4.039, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 23.81441182508725, |
|
"grad_norm": 0.414869099855423, |
|
"learning_rate": 0.00026239456901872043, |
|
"loss": 0.0534, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 23.81441182508725, |
|
"eval_cer": 0.01940165876777251, |
|
"eval_loss": 0.033250004053115845, |
|
"eval_runtime": 22.3878, |
|
"eval_samples_per_second": 4.02, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06613047363717604, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 23.917060151919525, |
|
"grad_norm": 0.24189621210098267, |
|
"learning_rate": 0.0002613659740794075, |
|
"loss": 0.0537, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 23.917060151919525, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.033227771520614624, |
|
"eval_runtime": 22.1167, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 24.019708478751795, |
|
"grad_norm": 0.19833236932754517, |
|
"learning_rate": 0.00026033737914009467, |
|
"loss": 0.0533, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 24.019708478751795, |
|
"eval_cer": 0.017920616113744077, |
|
"eval_loss": 0.03290673345327377, |
|
"eval_runtime": 22.278, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 24.12235680558407, |
|
"grad_norm": 0.15947362780570984, |
|
"learning_rate": 0.00025930878420078176, |
|
"loss": 0.0523, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 24.12235680558407, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03324710577726364, |
|
"eval_runtime": 22.2183, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 24.225005132416342, |
|
"grad_norm": 0.19958557188510895, |
|
"learning_rate": 0.00025828018926146886, |
|
"loss": 0.0522, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 24.225005132416342, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.031826525926589966, |
|
"eval_runtime": 23.5228, |
|
"eval_samples_per_second": 3.826, |
|
"eval_steps_per_second": 0.043, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 24.327653459248616, |
|
"grad_norm": 0.20118238031864166, |
|
"learning_rate": 0.00025725159432215595, |
|
"loss": 0.0518, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 24.327653459248616, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.03189582750201225, |
|
"eval_runtime": 22.1576, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 24.430301786080886, |
|
"grad_norm": 0.2729335129261017, |
|
"learning_rate": 0.00025622299938284304, |
|
"loss": 0.0529, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 24.430301786080886, |
|
"eval_cer": 0.017180094786729858, |
|
"eval_loss": 0.03211754932999611, |
|
"eval_runtime": 22.2144, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 24.53295011291316, |
|
"grad_norm": 0.18871107697486877, |
|
"learning_rate": 0.00025519440444353014, |
|
"loss": 0.0531, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 24.53295011291316, |
|
"eval_cer": 0.01806872037914692, |
|
"eval_loss": 0.031803932040929794, |
|
"eval_runtime": 22.1636, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 24.635598439745433, |
|
"grad_norm": 0.22552721202373505, |
|
"learning_rate": 0.00025416580950421723, |
|
"loss": 0.053, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 24.635598439745433, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.03302132338285446, |
|
"eval_runtime": 22.305, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 24.738246766577706, |
|
"grad_norm": 0.3143594264984131, |
|
"learning_rate": 0.0002531372145649044, |
|
"loss": 0.0535, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 24.738246766577706, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.032325536012649536, |
|
"eval_runtime": 22.2087, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 24.840895093409976, |
|
"grad_norm": 0.2053222805261612, |
|
"learning_rate": 0.00025210861962559147, |
|
"loss": 0.053, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 24.840895093409976, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.03247794508934021, |
|
"eval_runtime": 22.8241, |
|
"eval_samples_per_second": 3.943, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 24.94354342024225, |
|
"grad_norm": 0.2229388952255249, |
|
"learning_rate": 0.00025108002468627856, |
|
"loss": 0.0525, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 24.94354342024225, |
|
"eval_cer": 0.020290284360189575, |
|
"eval_loss": 0.032272905111312866, |
|
"eval_runtime": 22.7477, |
|
"eval_samples_per_second": 3.956, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.06702412868632708, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 25.046191747074523, |
|
"grad_norm": 0.21171027421951294, |
|
"learning_rate": 0.00025005142974696566, |
|
"loss": 0.052, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 25.046191747074523, |
|
"eval_cer": 0.018809241706161137, |
|
"eval_loss": 0.03205866739153862, |
|
"eval_runtime": 22.1495, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 25.148840073906797, |
|
"grad_norm": 0.28519150614738464, |
|
"learning_rate": 0.00024902283480765275, |
|
"loss": 0.0518, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 25.148840073906797, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.0315067283809185, |
|
"eval_runtime": 22.1659, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 25.251488400739067, |
|
"grad_norm": 0.19159762561321259, |
|
"learning_rate": 0.00024799423986833984, |
|
"loss": 0.0519, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 25.251488400739067, |
|
"eval_cer": 0.019105450236966824, |
|
"eval_loss": 0.03155896067619324, |
|
"eval_runtime": 22.1685, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06523681858802502, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 25.35413672757134, |
|
"grad_norm": 0.2302646040916443, |
|
"learning_rate": 0.00024696564492902694, |
|
"loss": 0.0521, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 25.35413672757134, |
|
"eval_cer": 0.018364928909952605, |
|
"eval_loss": 0.032100409269332886, |
|
"eval_runtime": 22.1736, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 25.456785054403614, |
|
"grad_norm": 0.2624431848526001, |
|
"learning_rate": 0.0002459370499897141, |
|
"loss": 0.0525, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 25.456785054403614, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.03178829327225685, |
|
"eval_runtime": 22.2239, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 25.559433381235884, |
|
"grad_norm": 0.24456505477428436, |
|
"learning_rate": 0.0002449084550504012, |
|
"loss": 0.052, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 25.559433381235884, |
|
"eval_cer": 0.01851303317535545, |
|
"eval_loss": 0.030579831451177597, |
|
"eval_runtime": 22.261, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 25.662081708068158, |
|
"grad_norm": 0.1688804030418396, |
|
"learning_rate": 0.00024387986011108827, |
|
"loss": 0.0518, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 25.662081708068158, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.033347100019454956, |
|
"eval_runtime": 22.153, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 25.76473003490043, |
|
"grad_norm": 0.1677083969116211, |
|
"learning_rate": 0.00024285126517177536, |
|
"loss": 0.0524, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 25.76473003490043, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.031150901690125465, |
|
"eval_runtime": 22.137, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 25.867378361732705, |
|
"grad_norm": 0.28190451860427856, |
|
"learning_rate": 0.00024182267023246246, |
|
"loss": 0.0525, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 25.867378361732705, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.030852019786834717, |
|
"eval_runtime": 22.2042, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 25.970026688564978, |
|
"grad_norm": 0.30484601855278015, |
|
"learning_rate": 0.00024079407529314955, |
|
"loss": 0.0525, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 25.970026688564978, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.03129878640174866, |
|
"eval_runtime": 22.1837, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 26.072675015397248, |
|
"grad_norm": 0.23301398754119873, |
|
"learning_rate": 0.00023976548035383667, |
|
"loss": 0.0511, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 26.072675015397248, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.03187458962202072, |
|
"eval_runtime": 22.2688, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 26.17532334222952, |
|
"grad_norm": 0.2110058218240738, |
|
"learning_rate": 0.00023873688541452376, |
|
"loss": 0.051, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 26.17532334222952, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.03192685917019844, |
|
"eval_runtime": 22.1497, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 26.277971669061795, |
|
"grad_norm": 0.7681686282157898, |
|
"learning_rate": 0.00023770829047521086, |
|
"loss": 0.0512, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 26.277971669061795, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.03258216753602028, |
|
"eval_runtime": 22.2138, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.064343163538874, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 26.380619995894065, |
|
"grad_norm": 0.20167267322540283, |
|
"learning_rate": 0.00023667969553589798, |
|
"loss": 0.0511, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 26.380619995894065, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.03150052949786186, |
|
"eval_runtime": 22.1067, |
|
"eval_samples_per_second": 4.071, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 26.48326832272634, |
|
"grad_norm": 0.2660065293312073, |
|
"learning_rate": 0.00023565110059658507, |
|
"loss": 0.0514, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 26.48326832272634, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.031788378953933716, |
|
"eval_runtime": 22.0549, |
|
"eval_samples_per_second": 4.081, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 26.585916649558612, |
|
"grad_norm": 0.18703380227088928, |
|
"learning_rate": 0.00023462250565727216, |
|
"loss": 0.0516, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 26.585916649558612, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.03131980076432228, |
|
"eval_runtime": 22.1633, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 26.688564976390886, |
|
"grad_norm": 0.25654709339141846, |
|
"learning_rate": 0.00023359391071795926, |
|
"loss": 0.0517, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 26.688564976390886, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.031445086002349854, |
|
"eval_runtime": 22.3924, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 26.791213303223156, |
|
"grad_norm": 0.2620410919189453, |
|
"learning_rate": 0.00023256531577864638, |
|
"loss": 0.052, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 26.791213303223156, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.03067016415297985, |
|
"eval_runtime": 22.1951, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 26.89386163005543, |
|
"grad_norm": 0.2175012230873108, |
|
"learning_rate": 0.00023153672083933347, |
|
"loss": 0.0519, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 26.89386163005543, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.031155884265899658, |
|
"eval_runtime": 22.1754, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 26.996509956887703, |
|
"grad_norm": 0.22425027191638947, |
|
"learning_rate": 0.00023050812590002056, |
|
"loss": 0.052, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 26.996509956887703, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.030132591724395752, |
|
"eval_runtime": 22.1695, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 27.099158283719976, |
|
"grad_norm": 0.24267776310443878, |
|
"learning_rate": 0.00022947953096070768, |
|
"loss": 0.0506, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 27.099158283719976, |
|
"eval_cer": 0.0173281990521327, |
|
"eval_loss": 0.030183136463165283, |
|
"eval_runtime": 22.1929, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 27.201806610552246, |
|
"grad_norm": 0.22535988688468933, |
|
"learning_rate": 0.00022845093602139478, |
|
"loss": 0.051, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 27.201806610552246, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.029872052371501923, |
|
"eval_runtime": 21.6648, |
|
"eval_samples_per_second": 4.154, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 27.30445493738452, |
|
"grad_norm": 0.2646799385547638, |
|
"learning_rate": 0.00022742234108208187, |
|
"loss": 0.0511, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 27.30445493738452, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.030452899634838104, |
|
"eval_runtime": 21.9979, |
|
"eval_samples_per_second": 4.091, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 27.407103264216794, |
|
"grad_norm": 0.1818641871213913, |
|
"learning_rate": 0.00022639374614276896, |
|
"loss": 0.0507, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 27.407103264216794, |
|
"eval_cer": 0.01806872037914692, |
|
"eval_loss": 0.030228691175580025, |
|
"eval_runtime": 22.1513, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 27.509751591049067, |
|
"grad_norm": 0.2914126515388489, |
|
"learning_rate": 0.00022536515120345608, |
|
"loss": 0.0506, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 27.509751591049067, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.030619405210018158, |
|
"eval_runtime": 22.1314, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06344950848972297, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 27.612399917881337, |
|
"grad_norm": 0.19746644794940948, |
|
"learning_rate": 0.00022433655626414317, |
|
"loss": 0.0508, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 27.612399917881337, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.029848448932170868, |
|
"eval_runtime": 22.0615, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 27.71504824471361, |
|
"grad_norm": 0.171453595161438, |
|
"learning_rate": 0.00022330796132483027, |
|
"loss": 0.0506, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 27.71504824471361, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.029385404661297798, |
|
"eval_runtime": 22.1939, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 27.817696571545884, |
|
"grad_norm": 0.20320715010166168, |
|
"learning_rate": 0.0002222793663855174, |
|
"loss": 0.0513, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 27.817696571545884, |
|
"eval_cer": 0.016735781990521326, |
|
"eval_loss": 0.03053821623325348, |
|
"eval_runtime": 22.1393, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 27.920344898378158, |
|
"grad_norm": 0.18147552013397217, |
|
"learning_rate": 0.00022125077144620448, |
|
"loss": 0.051, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 27.920344898378158, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.030883438885211945, |
|
"eval_runtime": 22.1075, |
|
"eval_samples_per_second": 4.071, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 28.022993225210428, |
|
"grad_norm": 0.3934711813926697, |
|
"learning_rate": 0.00022022217650689157, |
|
"loss": 0.0505, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 28.022993225210428, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.030014000833034515, |
|
"eval_runtime": 21.9105, |
|
"eval_samples_per_second": 4.108, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 28.1256415520427, |
|
"grad_norm": 0.18721525371074677, |
|
"learning_rate": 0.0002191935815675787, |
|
"loss": 0.0495, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 28.1256415520427, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.029803840443491936, |
|
"eval_runtime": 22.2218, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 28.228289878874975, |
|
"grad_norm": 0.2836057245731354, |
|
"learning_rate": 0.0002181649866282658, |
|
"loss": 0.0499, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 28.228289878874975, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.02985943853855133, |
|
"eval_runtime": 22.1574, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 28.33093820570725, |
|
"grad_norm": 0.16741269826889038, |
|
"learning_rate": 0.00021713639168895288, |
|
"loss": 0.0503, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 28.33093820570725, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.02978348545730114, |
|
"eval_runtime": 22.2061, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.058981233243967826, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 28.43358653253952, |
|
"grad_norm": 0.5261670351028442, |
|
"learning_rate": 0.00021610779674963997, |
|
"loss": 0.0505, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 28.43358653253952, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.02990272268652916, |
|
"eval_runtime": 22.1117, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 28.536234859371792, |
|
"grad_norm": 0.18356911838054657, |
|
"learning_rate": 0.0002150792018103271, |
|
"loss": 0.0502, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 28.536234859371792, |
|
"eval_cer": 0.017476303317535545, |
|
"eval_loss": 0.0308319590985775, |
|
"eval_runtime": 22.2031, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 28.638883186204065, |
|
"grad_norm": 0.2313164621591568, |
|
"learning_rate": 0.0002140506068710142, |
|
"loss": 0.0506, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 28.638883186204065, |
|
"eval_cer": 0.016735781990521326, |
|
"eval_loss": 0.03133101388812065, |
|
"eval_runtime": 22.2245, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06255585344057193, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 28.74153151303634, |
|
"grad_norm": 0.23044399917125702, |
|
"learning_rate": 0.00021302201193170128, |
|
"loss": 0.0511, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 28.74153151303634, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.0300216656178236, |
|
"eval_runtime": 22.3006, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 28.84417983986861, |
|
"grad_norm": 0.35165879130363464, |
|
"learning_rate": 0.0002119934169923884, |
|
"loss": 0.051, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 28.84417983986861, |
|
"eval_cer": 0.0173281990521327, |
|
"eval_loss": 0.029326628893613815, |
|
"eval_runtime": 22.3265, |
|
"eval_samples_per_second": 4.031, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 28.946828166700882, |
|
"grad_norm": 0.276239275932312, |
|
"learning_rate": 0.00021096482205307552, |
|
"loss": 0.0499, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 28.946828166700882, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.02999110147356987, |
|
"eval_runtime": 22.3215, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 29.049476493533156, |
|
"grad_norm": 0.2091301679611206, |
|
"learning_rate": 0.00020993622711376261, |
|
"loss": 0.0494, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 29.049476493533156, |
|
"eval_cer": 0.016735781990521326, |
|
"eval_loss": 0.029503343626856804, |
|
"eval_runtime": 22.2699, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 29.15212482036543, |
|
"grad_norm": 0.3076883852481842, |
|
"learning_rate": 0.0002089076321744497, |
|
"loss": 0.0493, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 29.15212482036543, |
|
"eval_cer": 0.01806872037914692, |
|
"eval_loss": 0.02992323227226734, |
|
"eval_runtime": 22.4692, |
|
"eval_samples_per_second": 4.005, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 29.2547731471977, |
|
"grad_norm": 0.2164120078086853, |
|
"learning_rate": 0.00020787903723513683, |
|
"loss": 0.0492, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 29.2547731471977, |
|
"eval_cer": 0.017772511848341232, |
|
"eval_loss": 0.029783058911561966, |
|
"eval_runtime": 22.2084, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 29.357421474029973, |
|
"grad_norm": 0.25016504526138306, |
|
"learning_rate": 0.00020685044229582392, |
|
"loss": 0.0497, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 29.357421474029973, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.029911022633314133, |
|
"eval_runtime": 22.0994, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05987488829311886, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 29.460069800862247, |
|
"grad_norm": 0.2327207326889038, |
|
"learning_rate": 0.00020582184735651101, |
|
"loss": 0.0496, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 29.460069800862247, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.028591720387339592, |
|
"eval_runtime": 22.1807, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 29.56271812769452, |
|
"grad_norm": 0.25889137387275696, |
|
"learning_rate": 0.00020479325241719813, |
|
"loss": 0.0497, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 29.56271812769452, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.02914293482899666, |
|
"eval_runtime": 22.2487, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 29.66536645452679, |
|
"grad_norm": 0.3326428532600403, |
|
"learning_rate": 0.00020376465747788523, |
|
"loss": 0.0499, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 29.66536645452679, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.028966935351490974, |
|
"eval_runtime": 22.2836, |
|
"eval_samples_per_second": 4.039, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 29.768014781359064, |
|
"grad_norm": 0.2747463583946228, |
|
"learning_rate": 0.00020273606253857232, |
|
"loss": 0.0498, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 29.768014781359064, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.027468033134937286, |
|
"eval_runtime": 22.3116, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 29.870663108191337, |
|
"grad_norm": 0.31372368335723877, |
|
"learning_rate": 0.00020170746759925941, |
|
"loss": 0.0495, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 29.870663108191337, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.029326878488063812, |
|
"eval_runtime": 22.3423, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 29.97331143502361, |
|
"grad_norm": 0.18704882264137268, |
|
"learning_rate": 0.00020067887265994653, |
|
"loss": 0.0499, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 29.97331143502361, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.028479211032390594, |
|
"eval_runtime": 22.4625, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 30.07595976185588, |
|
"grad_norm": 0.1708535999059677, |
|
"learning_rate": 0.00019965027772063363, |
|
"loss": 0.0484, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 30.07595976185588, |
|
"eval_cer": 0.013921800947867298, |
|
"eval_loss": 0.028360920026898384, |
|
"eval_runtime": 22.3021, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 30.178608088688154, |
|
"grad_norm": 0.24666380882263184, |
|
"learning_rate": 0.00019862168278132072, |
|
"loss": 0.0489, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 30.178608088688154, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.028621409088373184, |
|
"eval_runtime": 22.2968, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 30.281256415520428, |
|
"grad_norm": 0.2784259021282196, |
|
"learning_rate": 0.00019759308784200784, |
|
"loss": 0.0493, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 30.281256415520428, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.028690271079540253, |
|
"eval_runtime": 22.252, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 30.3839047423527, |
|
"grad_norm": 0.2628447711467743, |
|
"learning_rate": 0.00019656449290269493, |
|
"loss": 0.049, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 30.3839047423527, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.028396843001246452, |
|
"eval_runtime": 22.2552, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 30.48655306918497, |
|
"grad_norm": 0.23084846138954163, |
|
"learning_rate": 0.00019553589796338203, |
|
"loss": 0.049, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 30.48655306918497, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.02870321460068226, |
|
"eval_runtime": 22.2205, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 30.589201396017245, |
|
"grad_norm": 0.17921754717826843, |
|
"learning_rate": 0.00019450730302406912, |
|
"loss": 0.0491, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 30.589201396017245, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.028362760320305824, |
|
"eval_runtime": 22.2536, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 30.69184972284952, |
|
"grad_norm": 0.20431461930274963, |
|
"learning_rate": 0.00019347870808475624, |
|
"loss": 0.0493, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 30.69184972284952, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.027963554486632347, |
|
"eval_runtime": 22.2764, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 30.794498049681792, |
|
"grad_norm": 0.3255954384803772, |
|
"learning_rate": 0.00019245011314544333, |
|
"loss": 0.049, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 30.794498049681792, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.028848888352513313, |
|
"eval_runtime": 22.3367, |
|
"eval_samples_per_second": 4.029, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 30.897146376514062, |
|
"grad_norm": 0.30805402994155884, |
|
"learning_rate": 0.00019142151820613043, |
|
"loss": 0.0493, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 30.897146376514062, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.029449112713336945, |
|
"eval_runtime": 22.2797, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 30.999794703346335, |
|
"grad_norm": 0.2879369854927063, |
|
"learning_rate": 0.00019039292326681755, |
|
"loss": 0.0491, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 30.999794703346335, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.028681093826889992, |
|
"eval_runtime": 22.3072, |
|
"eval_samples_per_second": 4.035, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 31.10244303017861, |
|
"grad_norm": 0.30376702547073364, |
|
"learning_rate": 0.00018936432832750464, |
|
"loss": 0.0479, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 31.10244303017861, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.028124256059527397, |
|
"eval_runtime": 22.255, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 31.20509135701088, |
|
"grad_norm": 0.325859934091568, |
|
"learning_rate": 0.00018833573338819173, |
|
"loss": 0.0485, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 31.20509135701088, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.028593741357326508, |
|
"eval_runtime": 22.3606, |
|
"eval_samples_per_second": 4.025, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 31.307739683843153, |
|
"grad_norm": 0.26860424876213074, |
|
"learning_rate": 0.00018730713844887883, |
|
"loss": 0.0481, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 31.307739683843153, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.028675682842731476, |
|
"eval_runtime": 22.1875, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 31.410388010675426, |
|
"grad_norm": 0.36149585247039795, |
|
"learning_rate": 0.00018627854350956595, |
|
"loss": 0.0488, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 31.410388010675426, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.029292631894350052, |
|
"eval_runtime": 22.1759, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 31.5130363375077, |
|
"grad_norm": 0.23009520769119263, |
|
"learning_rate": 0.00018524994857025304, |
|
"loss": 0.0487, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 31.5130363375077, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.02981569990515709, |
|
"eval_runtime": 22.1972, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 31.61568466433997, |
|
"grad_norm": 0.2854170799255371, |
|
"learning_rate": 0.00018422135363094013, |
|
"loss": 0.0485, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 31.61568466433997, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.030133897438645363, |
|
"eval_runtime": 22.3301, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 31.718332991172243, |
|
"grad_norm": 0.20701636373996735, |
|
"learning_rate": 0.00018319275869162725, |
|
"loss": 0.0488, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 31.718332991172243, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.02945251390337944, |
|
"eval_runtime": 22.1144, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 31.820981318004517, |
|
"grad_norm": 0.16278359293937683, |
|
"learning_rate": 0.00018216416375231435, |
|
"loss": 0.0489, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 31.820981318004517, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.02939271740615368, |
|
"eval_runtime": 22.2354, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 31.92362964483679, |
|
"grad_norm": 0.20761480927467346, |
|
"learning_rate": 0.00018113556881300144, |
|
"loss": 0.049, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 31.92362964483679, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.028488388285040855, |
|
"eval_runtime": 22.1425, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 32.02627797166906, |
|
"grad_norm": 0.23206296563148499, |
|
"learning_rate": 0.00018010697387368853, |
|
"loss": 0.0487, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 32.02627797166906, |
|
"eval_cer": 0.017180094786729858, |
|
"eval_loss": 0.028869740664958954, |
|
"eval_runtime": 22.3356, |
|
"eval_samples_per_second": 4.029, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 32.128926298501334, |
|
"grad_norm": 0.3341107964515686, |
|
"learning_rate": 0.00017907837893437565, |
|
"loss": 0.0481, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 32.128926298501334, |
|
"eval_cer": 0.018216824644549764, |
|
"eval_loss": 0.029275845736265182, |
|
"eval_runtime": 22.1486, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06166219839142091, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 32.23157462533361, |
|
"grad_norm": 0.240467831492424, |
|
"learning_rate": 0.00017804978399506275, |
|
"loss": 0.0481, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 32.23157462533361, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.02915882132947445, |
|
"eval_runtime": 22.1429, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 32.33422295216588, |
|
"grad_norm": 0.24573862552642822, |
|
"learning_rate": 0.00017702118905574984, |
|
"loss": 0.0473, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 32.33422295216588, |
|
"eval_cer": 0.018364928909952605, |
|
"eval_loss": 0.02838301472365856, |
|
"eval_runtime": 22.1497, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 32.436871278998154, |
|
"grad_norm": 0.3885030746459961, |
|
"learning_rate": 0.00017599259411643696, |
|
"loss": 0.0486, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 32.436871278998154, |
|
"eval_cer": 0.01762440758293839, |
|
"eval_loss": 0.02944045141339302, |
|
"eval_runtime": 22.1602, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 32.53951960583043, |
|
"grad_norm": 0.31944355368614197, |
|
"learning_rate": 0.00017496399917712405, |
|
"loss": 0.0483, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 32.53951960583043, |
|
"eval_cer": 0.0173281990521327, |
|
"eval_loss": 0.028936417773365974, |
|
"eval_runtime": 21.9567, |
|
"eval_samples_per_second": 4.099, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 32.642167932662694, |
|
"grad_norm": 0.2156781703233719, |
|
"learning_rate": 0.00017393540423781115, |
|
"loss": 0.0477, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 32.642167932662694, |
|
"eval_cer": 0.018661137440758292, |
|
"eval_loss": 0.029529759660363197, |
|
"eval_runtime": 22.0507, |
|
"eval_samples_per_second": 4.082, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.06076854334226988, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 32.74481625949497, |
|
"grad_norm": 0.23771491646766663, |
|
"learning_rate": 0.00017290680929849824, |
|
"loss": 0.048, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 32.74481625949497, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.029350074008107185, |
|
"eval_runtime": 22.1715, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 32.84746458632724, |
|
"grad_norm": 0.4010453224182129, |
|
"learning_rate": 0.00017187821435918536, |
|
"loss": 0.0482, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 32.84746458632724, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.0294723492115736, |
|
"eval_runtime": 22.2642, |
|
"eval_samples_per_second": 4.042, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 32.950112913159515, |
|
"grad_norm": 0.21573138236999512, |
|
"learning_rate": 0.00017084961941987245, |
|
"loss": 0.0478, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 32.950112913159515, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.02849040925502777, |
|
"eval_runtime": 22.3919, |
|
"eval_samples_per_second": 4.019, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 33.05276123999179, |
|
"grad_norm": 0.2565110921859741, |
|
"learning_rate": 0.00016982102448055955, |
|
"loss": 0.0476, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 33.05276123999179, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.028644192963838577, |
|
"eval_runtime": 22.4917, |
|
"eval_samples_per_second": 4.001, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 33.15540956682406, |
|
"grad_norm": 0.17566250264644623, |
|
"learning_rate": 0.00016879242954124667, |
|
"loss": 0.0465, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 33.15540956682406, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.028031960129737854, |
|
"eval_runtime": 22.2193, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 33.258057893656336, |
|
"grad_norm": 0.21690410375595093, |
|
"learning_rate": 0.00016776383460193376, |
|
"loss": 0.0474, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 33.258057893656336, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.028126152232289314, |
|
"eval_runtime": 22.2514, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 33.36070622048861, |
|
"grad_norm": 0.17805682122707367, |
|
"learning_rate": 0.00016673523966262085, |
|
"loss": 0.047, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 33.36070622048861, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.028366120532155037, |
|
"eval_runtime": 22.1824, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 33.463354547320876, |
|
"grad_norm": 0.23276859521865845, |
|
"learning_rate": 0.00016570664472330795, |
|
"loss": 0.0478, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 33.463354547320876, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.02805442176759243, |
|
"eval_runtime": 22.0401, |
|
"eval_samples_per_second": 4.083, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 33.56600287415315, |
|
"grad_norm": 0.23791708052158356, |
|
"learning_rate": 0.00016467804978399507, |
|
"loss": 0.0479, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 33.56600287415315, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.028071463108062744, |
|
"eval_runtime": 22.166, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 33.66865120098542, |
|
"grad_norm": 0.22921526432037354, |
|
"learning_rate": 0.00016364945484468216, |
|
"loss": 0.0482, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 33.66865120098542, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.028569117188453674, |
|
"eval_runtime": 22.3334, |
|
"eval_samples_per_second": 4.03, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 33.771299527817696, |
|
"grad_norm": 0.1470087319612503, |
|
"learning_rate": 0.00016262085990536925, |
|
"loss": 0.0477, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 33.771299527817696, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.028614189475774765, |
|
"eval_runtime": 22.1814, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 33.87394785464997, |
|
"grad_norm": 0.4257276654243469, |
|
"learning_rate": 0.00016159226496605637, |
|
"loss": 0.0483, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 33.87394785464997, |
|
"eval_cer": 0.01643957345971564, |
|
"eval_loss": 0.028485840186476707, |
|
"eval_runtime": 22.174, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 33.97659618148224, |
|
"grad_norm": 0.4129483997821808, |
|
"learning_rate": 0.00016056367002674347, |
|
"loss": 0.0475, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 33.97659618148224, |
|
"eval_cer": 0.01525473933649289, |
|
"eval_loss": 0.028352849185466766, |
|
"eval_runtime": 22.3086, |
|
"eval_samples_per_second": 4.034, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 34.07924450831452, |
|
"grad_norm": 0.2835679054260254, |
|
"learning_rate": 0.00015953507508743056, |
|
"loss": 0.0465, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 34.07924450831452, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.028890669345855713, |
|
"eval_runtime": 22.1774, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 34.18189283514679, |
|
"grad_norm": 0.2247968167066574, |
|
"learning_rate": 0.00015850648014811765, |
|
"loss": 0.0469, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 34.18189283514679, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.028413381427526474, |
|
"eval_runtime": 22.0968, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 34.28454116197906, |
|
"grad_norm": 0.17359092831611633, |
|
"learning_rate": 0.00015747788520880477, |
|
"loss": 0.0468, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 34.28454116197906, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.028406651690602303, |
|
"eval_runtime": 22.1838, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 34.38718948881133, |
|
"grad_norm": 0.2905976176261902, |
|
"learning_rate": 0.00015644929026949187, |
|
"loss": 0.0467, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 34.38718948881133, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.028134917840361595, |
|
"eval_runtime": 22.2384, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 34.489837815643604, |
|
"grad_norm": 0.41442832350730896, |
|
"learning_rate": 0.00015542069533017896, |
|
"loss": 0.0472, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 34.489837815643604, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.028330376371741295, |
|
"eval_runtime": 22.2855, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 34.59248614247588, |
|
"grad_norm": 0.20874008536338806, |
|
"learning_rate": 0.00015439210039086608, |
|
"loss": 0.0472, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 34.59248614247588, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.028587637469172478, |
|
"eval_runtime": 22.2778, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 34.69513446930815, |
|
"grad_norm": 0.28286242485046387, |
|
"learning_rate": 0.00015336350545155317, |
|
"loss": 0.047, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 34.69513446930815, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.028073778375983238, |
|
"eval_runtime": 22.2502, |
|
"eval_samples_per_second": 4.045, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 34.797782796140424, |
|
"grad_norm": 0.2530520558357239, |
|
"learning_rate": 0.00015233491051224027, |
|
"loss": 0.0469, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 34.797782796140424, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.02780935913324356, |
|
"eval_runtime": 22.1698, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 34.9004311229727, |
|
"grad_norm": 0.24278897047042847, |
|
"learning_rate": 0.00015130631557292736, |
|
"loss": 0.0474, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 34.9004311229727, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.027951352298259735, |
|
"eval_runtime": 22.2193, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 35.00307944980497, |
|
"grad_norm": 0.23415499925613403, |
|
"learning_rate": 0.00015027772063361448, |
|
"loss": 0.0469, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 35.00307944980497, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.02772090956568718, |
|
"eval_runtime": 22.09, |
|
"eval_samples_per_second": 4.074, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05719392314566577, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 35.10572777663724, |
|
"grad_norm": 0.4553733468055725, |
|
"learning_rate": 0.00014924912569430157, |
|
"loss": 0.0458, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 35.10572777663724, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.027903633192181587, |
|
"eval_runtime": 22.1864, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 35.20837610346951, |
|
"grad_norm": 0.1911894530057907, |
|
"learning_rate": 0.00014822053075498867, |
|
"loss": 0.0464, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 35.20837610346951, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.0284242145717144, |
|
"eval_runtime": 22.2316, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 35.311024430301785, |
|
"grad_norm": 0.9561129212379456, |
|
"learning_rate": 0.0001471919358156758, |
|
"loss": 0.0463, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 35.311024430301785, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.02789863385260105, |
|
"eval_runtime": 22.2194, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 35.41367275713406, |
|
"grad_norm": 0.2921608090400696, |
|
"learning_rate": 0.0001461633408763629, |
|
"loss": 0.0461, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 35.41367275713406, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.028261249884963036, |
|
"eval_runtime": 22.0199, |
|
"eval_samples_per_second": 4.087, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 35.51632108396633, |
|
"grad_norm": 0.19372744858264923, |
|
"learning_rate": 0.00014513474593705, |
|
"loss": 0.0463, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 35.51632108396633, |
|
"eval_cer": 0.01688388625592417, |
|
"eval_loss": 0.028103064745664597, |
|
"eval_runtime": 22.1334, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.0580875781948168, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 35.618969410798606, |
|
"grad_norm": 0.27431151270866394, |
|
"learning_rate": 0.00014410615099773712, |
|
"loss": 0.0472, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 35.618969410798606, |
|
"eval_cer": 0.016587677725118485, |
|
"eval_loss": 0.028425684198737144, |
|
"eval_runtime": 22.139, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05630026809651475, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 35.72161773763088, |
|
"grad_norm": 0.27128133177757263, |
|
"learning_rate": 0.0001430775560584242, |
|
"loss": 0.0466, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 35.72161773763088, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.02764798142015934, |
|
"eval_runtime": 22.2733, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05451295799821269, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 35.824266064463146, |
|
"grad_norm": 0.26995572447776794, |
|
"learning_rate": 0.0001420489611191113, |
|
"loss": 0.0467, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 35.824266064463146, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.027596063911914825, |
|
"eval_runtime": 22.0467, |
|
"eval_samples_per_second": 4.082, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 35.92691439129542, |
|
"grad_norm": 0.31239375472068787, |
|
"learning_rate": 0.0001410203661797984, |
|
"loss": 0.0469, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 35.92691439129542, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.028232304379343987, |
|
"eval_runtime": 22.1495, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 36.02956271812769, |
|
"grad_norm": 0.344926655292511, |
|
"learning_rate": 0.00013999177124048552, |
|
"loss": 0.0464, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 36.02956271812769, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.026858482509851456, |
|
"eval_runtime": 22.2106, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 36.132211044959966, |
|
"grad_norm": 0.1864170879125595, |
|
"learning_rate": 0.0001389631763011726, |
|
"loss": 0.0449, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 36.132211044959966, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.02711603231728077, |
|
"eval_runtime": 22.2552, |
|
"eval_samples_per_second": 4.044, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 36.23485937179224, |
|
"grad_norm": 0.1876976191997528, |
|
"learning_rate": 0.0001379345813618597, |
|
"loss": 0.0466, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 36.23485937179224, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.027281379327178, |
|
"eval_runtime": 22.2442, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 36.33750769862451, |
|
"grad_norm": 0.21546737849712372, |
|
"learning_rate": 0.00013690598642254683, |
|
"loss": 0.0458, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 36.33750769862451, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.02689436264336109, |
|
"eval_runtime": 22.259, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 36.44015602545679, |
|
"grad_norm": 0.31887394189834595, |
|
"learning_rate": 0.00013587739148323392, |
|
"loss": 0.0459, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 36.44015602545679, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.027051741257309914, |
|
"eval_runtime": 22.2336, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 36.54280435228906, |
|
"grad_norm": 0.24578991532325745, |
|
"learning_rate": 0.000134848796543921, |
|
"loss": 0.0469, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 36.54280435228906, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.026666434481739998, |
|
"eval_runtime": 22.215, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 36.64545267912133, |
|
"grad_norm": 0.3394581377506256, |
|
"learning_rate": 0.0001338202016046081, |
|
"loss": 0.0458, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 36.64545267912133, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.02698771469295025, |
|
"eval_runtime": 22.1744, |
|
"eval_samples_per_second": 4.059, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 36.7481010059536, |
|
"grad_norm": 0.2731720805168152, |
|
"learning_rate": 0.00013279160666529523, |
|
"loss": 0.0462, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 36.7481010059536, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.02696968987584114, |
|
"eval_runtime": 22.1592, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 36.850749332785874, |
|
"grad_norm": 0.2410440891981125, |
|
"learning_rate": 0.00013176301172598232, |
|
"loss": 0.0466, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 36.850749332785874, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.027307961136102676, |
|
"eval_runtime": 22.243, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 36.95339765961815, |
|
"grad_norm": 0.28017568588256836, |
|
"learning_rate": 0.0001307344167866694, |
|
"loss": 0.046, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 36.95339765961815, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.02654258720576763, |
|
"eval_runtime": 22.4072, |
|
"eval_samples_per_second": 4.017, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 37.05604598645042, |
|
"grad_norm": 0.23387791216373444, |
|
"learning_rate": 0.00012970582184735653, |
|
"loss": 0.0456, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 37.05604598645042, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.026880960911512375, |
|
"eval_runtime": 22.1133, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 37.158694313282695, |
|
"grad_norm": 0.3157537579536438, |
|
"learning_rate": 0.00012867722690804363, |
|
"loss": 0.0452, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 37.158694313282695, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.026554275304079056, |
|
"eval_runtime": 22.2184, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 37.26134264011497, |
|
"grad_norm": 0.301200807094574, |
|
"learning_rate": 0.00012764863196873072, |
|
"loss": 0.0456, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 37.26134264011497, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.02748226933181286, |
|
"eval_runtime": 22.1594, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 37.36399096694724, |
|
"grad_norm": 0.1987874060869217, |
|
"learning_rate": 0.0001266200370294178, |
|
"loss": 0.0454, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 37.36399096694724, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.02689102478325367, |
|
"eval_runtime": 22.289, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 37.46663929377951, |
|
"grad_norm": 0.2465968132019043, |
|
"learning_rate": 0.00012559144209010493, |
|
"loss": 0.0453, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 37.46663929377951, |
|
"eval_cer": 0.012588862559241706, |
|
"eval_loss": 0.026751089841127396, |
|
"eval_runtime": 21.9569, |
|
"eval_samples_per_second": 4.099, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 37.56928762061178, |
|
"grad_norm": 0.39610666036605835, |
|
"learning_rate": 0.00012456284715079203, |
|
"loss": 0.046, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 37.56928762061178, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.026830825954675674, |
|
"eval_runtime": 22.2783, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 37.671935947444055, |
|
"grad_norm": 0.26581278443336487, |
|
"learning_rate": 0.00012353425221147912, |
|
"loss": 0.046, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 37.671935947444055, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.0267048142850399, |
|
"eval_runtime": 22.1983, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 37.77458427427633, |
|
"grad_norm": 0.22986672818660736, |
|
"learning_rate": 0.00012250565727216624, |
|
"loss": 0.0459, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 37.77458427427633, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.026945000514388084, |
|
"eval_runtime": 22.1927, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 37.8772326011086, |
|
"grad_norm": 0.2647237479686737, |
|
"learning_rate": 0.00012147706233285333, |
|
"loss": 0.0454, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 37.8772326011086, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.027104683220386505, |
|
"eval_runtime": 22.1006, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 37.979880927940876, |
|
"grad_norm": 0.2698921263217926, |
|
"learning_rate": 0.00012044846739354043, |
|
"loss": 0.0459, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 37.979880927940876, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.027364252135157585, |
|
"eval_runtime": 22.1262, |
|
"eval_samples_per_second": 4.068, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 38.08252925477315, |
|
"grad_norm": 0.2091304063796997, |
|
"learning_rate": 0.00011941987245422753, |
|
"loss": 0.0453, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 38.08252925477315, |
|
"eval_cer": 0.013329383886255925, |
|
"eval_loss": 0.026948757469654083, |
|
"eval_runtime": 22.0181, |
|
"eval_samples_per_second": 4.088, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 38.18517758160542, |
|
"grad_norm": 0.21031425893306732, |
|
"learning_rate": 0.00011839127751491462, |
|
"loss": 0.0446, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 38.18517758160542, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.027428090572357178, |
|
"eval_runtime": 22.1193, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 38.28782590843769, |
|
"grad_norm": 0.16268426179885864, |
|
"learning_rate": 0.00011736268257560173, |
|
"loss": 0.0452, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 38.28782590843769, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.0269022174179554, |
|
"eval_runtime": 22.3714, |
|
"eval_samples_per_second": 4.023, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 38.39047423526996, |
|
"grad_norm": 0.18285077810287476, |
|
"learning_rate": 0.00011633408763628884, |
|
"loss": 0.0456, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 38.39047423526996, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.026672353968024254, |
|
"eval_runtime": 22.4441, |
|
"eval_samples_per_second": 4.01, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 38.493122562102236, |
|
"grad_norm": 0.14922891557216644, |
|
"learning_rate": 0.00011530549269697593, |
|
"loss": 0.0449, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 38.493122562102236, |
|
"eval_cer": 0.01525473933649289, |
|
"eval_loss": 0.026156587526202202, |
|
"eval_runtime": 22.2935, |
|
"eval_samples_per_second": 4.037, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 38.59577088893451, |
|
"grad_norm": 0.2601664662361145, |
|
"learning_rate": 0.00011427689775766304, |
|
"loss": 0.0453, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 38.59577088893451, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.026526469737291336, |
|
"eval_runtime": 22.4581, |
|
"eval_samples_per_second": 4.007, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 38.69841921576678, |
|
"grad_norm": 0.23301970958709717, |
|
"learning_rate": 0.00011324830281835013, |
|
"loss": 0.0454, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 38.69841921576678, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.026089007034897804, |
|
"eval_runtime": 22.0613, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 38.80106754259906, |
|
"grad_norm": 0.1848640739917755, |
|
"learning_rate": 0.00011221970787903724, |
|
"loss": 0.0455, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 38.80106754259906, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.02705644629895687, |
|
"eval_runtime": 22.0628, |
|
"eval_samples_per_second": 4.079, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 38.90371586943133, |
|
"grad_norm": 0.2265135794878006, |
|
"learning_rate": 0.00011119111293972433, |
|
"loss": 0.0449, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 38.90371586943133, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.026829397305846214, |
|
"eval_runtime": 22.1974, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 39.006364196263604, |
|
"grad_norm": 0.18336538970470428, |
|
"learning_rate": 0.00011016251800041144, |
|
"loss": 0.045, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 39.006364196263604, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.02715076506137848, |
|
"eval_runtime": 22.2325, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 39.10901252309587, |
|
"grad_norm": 0.2116203010082245, |
|
"learning_rate": 0.00010913392306109854, |
|
"loss": 0.0442, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 39.10901252309587, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.026725102216005325, |
|
"eval_runtime": 22.0106, |
|
"eval_samples_per_second": 4.089, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 39.211660849928144, |
|
"grad_norm": 0.1735929548740387, |
|
"learning_rate": 0.00010810532812178564, |
|
"loss": 0.0446, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 39.211660849928144, |
|
"eval_cer": 0.017031990521327013, |
|
"eval_loss": 0.026632068678736687, |
|
"eval_runtime": 22.2735, |
|
"eval_samples_per_second": 4.041, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 39.31430917676042, |
|
"grad_norm": 0.29244282841682434, |
|
"learning_rate": 0.00010707673318247274, |
|
"loss": 0.0443, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 39.31430917676042, |
|
"eval_cer": 0.01525473933649289, |
|
"eval_loss": 0.02649509161710739, |
|
"eval_runtime": 22.223, |
|
"eval_samples_per_second": 4.05, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 39.41695750359269, |
|
"grad_norm": 0.21645478904247284, |
|
"learning_rate": 0.00010604813824315984, |
|
"loss": 0.0448, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 39.41695750359269, |
|
"eval_cer": 0.015995260663507108, |
|
"eval_loss": 0.02655700594186783, |
|
"eval_runtime": 22.1333, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05540661304736372, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 39.519605830424965, |
|
"grad_norm": 0.24354924261569977, |
|
"learning_rate": 0.00010501954330384694, |
|
"loss": 0.0447, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 39.519605830424965, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.026025088503956795, |
|
"eval_runtime": 22.2171, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 39.62225415725724, |
|
"grad_norm": 0.15328273177146912, |
|
"learning_rate": 0.00010399094836453404, |
|
"loss": 0.0452, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 39.62225415725724, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.025482947006821632, |
|
"eval_runtime": 22.319, |
|
"eval_samples_per_second": 4.032, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 39.72490248408951, |
|
"grad_norm": 0.25354963541030884, |
|
"learning_rate": 0.00010296235342522114, |
|
"loss": 0.0447, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 39.72490248408951, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.026158807799220085, |
|
"eval_runtime": 22.2432, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 39.827550810921785, |
|
"grad_norm": 0.18456153571605682, |
|
"learning_rate": 0.00010193375848590825, |
|
"loss": 0.0452, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 39.827550810921785, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.026585763320326805, |
|
"eval_runtime": 22.3419, |
|
"eval_samples_per_second": 4.028, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 39.93019913775405, |
|
"grad_norm": 1.1289212703704834, |
|
"learning_rate": 0.00010090516354659534, |
|
"loss": 0.0451, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 39.93019913775405, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.026965312659740448, |
|
"eval_runtime": 22.2111, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 40.032847464586325, |
|
"grad_norm": 0.2642553448677063, |
|
"learning_rate": 9.987656860728245e-05, |
|
"loss": 0.0441, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 40.032847464586325, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.02652685157954693, |
|
"eval_runtime": 21.9945, |
|
"eval_samples_per_second": 4.092, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 40.1354957914186, |
|
"grad_norm": 0.2834232449531555, |
|
"learning_rate": 9.884797366796956e-05, |
|
"loss": 0.0441, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 40.1354957914186, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.026538578793406487, |
|
"eval_runtime": 21.6963, |
|
"eval_samples_per_second": 4.148, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 40.23814411825087, |
|
"grad_norm": 0.366251140832901, |
|
"learning_rate": 9.781937872865666e-05, |
|
"loss": 0.044, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 40.23814411825087, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.025871722027659416, |
|
"eval_runtime": 21.9657, |
|
"eval_samples_per_second": 4.097, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 40.340792445083146, |
|
"grad_norm": 0.1964322179555893, |
|
"learning_rate": 9.679078378934376e-05, |
|
"loss": 0.0442, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 40.340792445083146, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.025851983577013016, |
|
"eval_runtime": 21.9412, |
|
"eval_samples_per_second": 4.102, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 40.44344077191542, |
|
"grad_norm": 0.19335012137889862, |
|
"learning_rate": 9.576218885003086e-05, |
|
"loss": 0.0444, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 40.44344077191542, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.025898663327097893, |
|
"eval_runtime": 22.0686, |
|
"eval_samples_per_second": 4.078, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 40.54608909874769, |
|
"grad_norm": 0.2371419221162796, |
|
"learning_rate": 9.473359391071797e-05, |
|
"loss": 0.0448, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 40.54608909874769, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.02598469704389572, |
|
"eval_runtime": 22.2182, |
|
"eval_samples_per_second": 4.051, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 40.64873742557997, |
|
"grad_norm": 0.190704807639122, |
|
"learning_rate": 9.370499897140506e-05, |
|
"loss": 0.0446, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 40.64873742557997, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.025896675884723663, |
|
"eval_runtime": 22.4373, |
|
"eval_samples_per_second": 4.011, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 40.75138575241223, |
|
"grad_norm": 0.24071630835533142, |
|
"learning_rate": 9.267640403209217e-05, |
|
"loss": 0.0445, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 40.75138575241223, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.025766143575310707, |
|
"eval_runtime": 22.1683, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 40.85403407924451, |
|
"grad_norm": 0.37343931198120117, |
|
"learning_rate": 9.164780909277926e-05, |
|
"loss": 0.044, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 40.85403407924451, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.025763213634490967, |
|
"eval_runtime": 22.2275, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 40.95668240607678, |
|
"grad_norm": 0.2885558605194092, |
|
"learning_rate": 9.061921415346637e-05, |
|
"loss": 0.0446, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 40.95668240607678, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.02574954554438591, |
|
"eval_runtime": 22.2623, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 41.05933073290905, |
|
"grad_norm": 0.2904144525527954, |
|
"learning_rate": 8.959061921415348e-05, |
|
"loss": 0.0437, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 41.05933073290905, |
|
"eval_cer": 0.016291469194312798, |
|
"eval_loss": 0.026339180767536163, |
|
"eval_runtime": 22.3982, |
|
"eval_samples_per_second": 4.018, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 41.16197905974133, |
|
"grad_norm": 0.47024524211883545, |
|
"learning_rate": 8.856202427484057e-05, |
|
"loss": 0.0441, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 41.16197905974133, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.026481064036488533, |
|
"eval_runtime": 22.1309, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 41.2646273865736, |
|
"grad_norm": 0.20567986369132996, |
|
"learning_rate": 8.753342933552768e-05, |
|
"loss": 0.0438, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 41.2646273865736, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.026423340663313866, |
|
"eval_runtime": 22.1647, |
|
"eval_samples_per_second": 4.061, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 41.367275713405874, |
|
"grad_norm": 0.25154054164886475, |
|
"learning_rate": 8.650483439621477e-05, |
|
"loss": 0.0439, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 41.367275713405874, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.025870798155665398, |
|
"eval_runtime": 22.2814, |
|
"eval_samples_per_second": 4.039, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 41.46992404023814, |
|
"grad_norm": 0.22818030416965485, |
|
"learning_rate": 8.547623945690188e-05, |
|
"loss": 0.0437, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 41.46992404023814, |
|
"eval_cer": 0.015847156398104266, |
|
"eval_loss": 0.025692187249660492, |
|
"eval_runtime": 22.2761, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 41.572572367070414, |
|
"grad_norm": 0.20878200232982635, |
|
"learning_rate": 8.444764451758897e-05, |
|
"loss": 0.044, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 41.572572367070414, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.025789031758904457, |
|
"eval_runtime": 22.1041, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 41.67522069390269, |
|
"grad_norm": 0.37076711654663086, |
|
"learning_rate": 8.341904957827608e-05, |
|
"loss": 0.0438, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 41.67522069390269, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.025606298819184303, |
|
"eval_runtime": 22.1493, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 41.77786902073496, |
|
"grad_norm": 0.3799729347229004, |
|
"learning_rate": 8.239045463896318e-05, |
|
"loss": 0.0439, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 41.77786902073496, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.02529660426080227, |
|
"eval_runtime": 22.1548, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 41.880517347567235, |
|
"grad_norm": 0.1976720541715622, |
|
"learning_rate": 8.136185969965028e-05, |
|
"loss": 0.0442, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 41.880517347567235, |
|
"eval_cer": 0.013181279620853081, |
|
"eval_loss": 0.025364946573972702, |
|
"eval_runtime": 22.2419, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 41.98316567439951, |
|
"grad_norm": 0.35871848464012146, |
|
"learning_rate": 8.033326476033738e-05, |
|
"loss": 0.0438, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 41.98316567439951, |
|
"eval_cer": 0.016143364928909953, |
|
"eval_loss": 0.02546422928571701, |
|
"eval_runtime": 22.0995, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05361930294906166, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 42.08581400123178, |
|
"grad_norm": 0.3228365480899811, |
|
"learning_rate": 7.930466982102448e-05, |
|
"loss": 0.0435, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 42.08581400123178, |
|
"eval_cer": 0.013921800947867298, |
|
"eval_loss": 0.025534870103001595, |
|
"eval_runtime": 22.0953, |
|
"eval_samples_per_second": 4.073, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 42.188462328064055, |
|
"grad_norm": 0.32005515694618225, |
|
"learning_rate": 7.827607488171158e-05, |
|
"loss": 0.0432, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 42.188462328064055, |
|
"eval_cer": 0.01229265402843602, |
|
"eval_loss": 0.024984827265143394, |
|
"eval_runtime": 22.11, |
|
"eval_samples_per_second": 4.071, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 42.29111065489632, |
|
"grad_norm": 0.2567862570285797, |
|
"learning_rate": 7.724747994239868e-05, |
|
"loss": 0.0435, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 42.29111065489632, |
|
"eval_cer": 0.01525473933649289, |
|
"eval_loss": 0.025618551298975945, |
|
"eval_runtime": 22.2013, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05183199285075961, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 42.393758981728595, |
|
"grad_norm": 0.39079025387763977, |
|
"learning_rate": 7.621888500308578e-05, |
|
"loss": 0.0434, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 42.393758981728595, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.025224734097719193, |
|
"eval_runtime": 21.8997, |
|
"eval_samples_per_second": 4.11, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 42.49640730856087, |
|
"grad_norm": 0.20112274587154388, |
|
"learning_rate": 7.519029006377289e-05, |
|
"loss": 0.0434, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 42.49640730856087, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.02557324431836605, |
|
"eval_runtime": 22.1285, |
|
"eval_samples_per_second": 4.067, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 42.59905563539314, |
|
"grad_norm": 0.29187527298927307, |
|
"learning_rate": 7.416169512445998e-05, |
|
"loss": 0.0433, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 42.59905563539314, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.02564132958650589, |
|
"eval_runtime": 22.0916, |
|
"eval_samples_per_second": 4.074, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 42.701703962225416, |
|
"grad_norm": 0.20293624699115753, |
|
"learning_rate": 7.313310018514709e-05, |
|
"loss": 0.0437, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 42.701703962225416, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.025618135929107666, |
|
"eval_runtime": 22.0252, |
|
"eval_samples_per_second": 4.086, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 42.80435228905769, |
|
"grad_norm": 0.2777237296104431, |
|
"learning_rate": 7.210450524583418e-05, |
|
"loss": 0.0438, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 42.80435228905769, |
|
"eval_cer": 0.01569905213270142, |
|
"eval_loss": 0.025969378650188446, |
|
"eval_runtime": 22.1694, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05272564789991063, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 42.90700061588996, |
|
"grad_norm": 0.29848456382751465, |
|
"learning_rate": 7.107591030652129e-05, |
|
"loss": 0.0436, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 42.90700061588996, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.02571621723473072, |
|
"eval_runtime": 22.2799, |
|
"eval_samples_per_second": 4.04, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 43.00964894272224, |
|
"grad_norm": 0.21752919256687164, |
|
"learning_rate": 7.004731536720838e-05, |
|
"loss": 0.0435, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 43.00964894272224, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.025668691843748093, |
|
"eval_runtime": 22.1447, |
|
"eval_samples_per_second": 4.064, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.05093833780160858, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 43.1122972695545, |
|
"grad_norm": 0.3110567033290863, |
|
"learning_rate": 6.901872042789549e-05, |
|
"loss": 0.0431, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 43.1122972695545, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.025269243866205215, |
|
"eval_runtime": 22.5329, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 43.21494559638678, |
|
"grad_norm": 0.26389312744140625, |
|
"learning_rate": 6.79901254885826e-05, |
|
"loss": 0.0429, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 43.21494559638678, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.025585921481251717, |
|
"eval_runtime": 22.2351, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 43.31759392321905, |
|
"grad_norm": 0.2228713184595108, |
|
"learning_rate": 6.69615305492697e-05, |
|
"loss": 0.043, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 43.31759392321905, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.02543068863451481, |
|
"eval_runtime": 22.1864, |
|
"eval_samples_per_second": 4.057, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 43.420242250051324, |
|
"grad_norm": 0.1402529925107956, |
|
"learning_rate": 6.593293560995681e-05, |
|
"loss": 0.0439, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 43.420242250051324, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.025465745478868484, |
|
"eval_runtime": 22.3826, |
|
"eval_samples_per_second": 4.021, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 43.5228905768836, |
|
"grad_norm": 0.3183715045452118, |
|
"learning_rate": 6.49043406706439e-05, |
|
"loss": 0.0431, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 43.5228905768836, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.025318369269371033, |
|
"eval_runtime": 22.2919, |
|
"eval_samples_per_second": 4.037, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 43.62553890371587, |
|
"grad_norm": 0.29827529191970825, |
|
"learning_rate": 6.387574573133101e-05, |
|
"loss": 0.0434, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 43.62553890371587, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.025606686249375343, |
|
"eval_runtime": 22.0625, |
|
"eval_samples_per_second": 4.079, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 43.728187230548144, |
|
"grad_norm": 0.16081830859184265, |
|
"learning_rate": 6.28471507920181e-05, |
|
"loss": 0.0431, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 43.728187230548144, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.025782672688364983, |
|
"eval_runtime": 22.1883, |
|
"eval_samples_per_second": 4.056, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 43.83083555738042, |
|
"grad_norm": 0.19551779329776764, |
|
"learning_rate": 6.181855585270521e-05, |
|
"loss": 0.0437, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 43.83083555738042, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.025874827057123184, |
|
"eval_runtime": 22.0445, |
|
"eval_samples_per_second": 4.083, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 43.933483884212684, |
|
"grad_norm": 0.3405396044254303, |
|
"learning_rate": 6.07899609133923e-05, |
|
"loss": 0.0431, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 43.933483884212684, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.025233900174498558, |
|
"eval_runtime": 22.1498, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 44.03613221104496, |
|
"grad_norm": 0.1871403306722641, |
|
"learning_rate": 5.97613659740794e-05, |
|
"loss": 0.0424, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 44.03613221104496, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.025231176987290382, |
|
"eval_runtime": 22.1695, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 44.13878053787723, |
|
"grad_norm": 0.272748202085495, |
|
"learning_rate": 5.873277103476652e-05, |
|
"loss": 0.0425, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 44.13878053787723, |
|
"eval_cer": 0.012885071090047393, |
|
"eval_loss": 0.025549624115228653, |
|
"eval_runtime": 22.1325, |
|
"eval_samples_per_second": 4.066, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 44.241428864709505, |
|
"grad_norm": 0.15834620594978333, |
|
"learning_rate": 5.7704176095453617e-05, |
|
"loss": 0.0428, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 44.241428864709505, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.02536383457481861, |
|
"eval_runtime": 22.0624, |
|
"eval_samples_per_second": 4.079, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 44.34407719154178, |
|
"grad_norm": 0.21297834813594818, |
|
"learning_rate": 5.6675581156140717e-05, |
|
"loss": 0.0426, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 44.34407719154178, |
|
"eval_cer": 0.013033175355450236, |
|
"eval_loss": 0.025508729740977287, |
|
"eval_runtime": 22.2302, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 44.44672551837405, |
|
"grad_norm": 0.3164765536785126, |
|
"learning_rate": 5.5646986216827816e-05, |
|
"loss": 0.0428, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 44.44672551837405, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.025067314505577087, |
|
"eval_runtime": 22.074, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 44.549373845206325, |
|
"grad_norm": 0.14805859327316284, |
|
"learning_rate": 5.4618391277514916e-05, |
|
"loss": 0.0435, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 44.549373845206325, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.02515277825295925, |
|
"eval_runtime": 22.2077, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 44.6520221720386, |
|
"grad_norm": 0.19713029265403748, |
|
"learning_rate": 5.3589796338202016e-05, |
|
"loss": 0.0429, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 44.6520221720386, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.02521173469722271, |
|
"eval_runtime": 22.2596, |
|
"eval_samples_per_second": 4.043, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 44.754670498870865, |
|
"grad_norm": 0.2486603856086731, |
|
"learning_rate": 5.2561201398889116e-05, |
|
"loss": 0.0428, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 44.754670498870865, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.025502758100628853, |
|
"eval_runtime": 22.2902, |
|
"eval_samples_per_second": 4.038, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 44.85731882570314, |
|
"grad_norm": 0.29049795866012573, |
|
"learning_rate": 5.153260645957622e-05, |
|
"loss": 0.0425, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 44.85731882570314, |
|
"eval_cer": 0.01525473933649289, |
|
"eval_loss": 0.02529684267938137, |
|
"eval_runtime": 22.181, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 44.95996715253541, |
|
"grad_norm": 0.2436273992061615, |
|
"learning_rate": 5.050401152026332e-05, |
|
"loss": 0.0428, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 44.95996715253541, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.024828782305121422, |
|
"eval_runtime": 22.2395, |
|
"eval_samples_per_second": 4.047, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 45.062615479367686, |
|
"grad_norm": 0.24658174812793732, |
|
"learning_rate": 4.947541658095042e-05, |
|
"loss": 0.0424, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 45.062615479367686, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.025087928399443626, |
|
"eval_runtime": 22.0216, |
|
"eval_samples_per_second": 4.087, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 45.16526380619996, |
|
"grad_norm": 0.39545565843582153, |
|
"learning_rate": 4.844682164163752e-05, |
|
"loss": 0.0423, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 45.16526380619996, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.0248849056661129, |
|
"eval_runtime": 22.0732, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 45.26791213303223, |
|
"grad_norm": 0.3729030191898346, |
|
"learning_rate": 4.741822670232462e-05, |
|
"loss": 0.0421, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 45.26791213303223, |
|
"eval_cer": 0.013773696682464455, |
|
"eval_loss": 0.024852894246578217, |
|
"eval_runtime": 22.1136, |
|
"eval_samples_per_second": 4.07, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 45.37056045986451, |
|
"grad_norm": 0.2696306109428406, |
|
"learning_rate": 4.638963176301172e-05, |
|
"loss": 0.0424, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 45.37056045986451, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.024875616654753685, |
|
"eval_runtime": 22.2425, |
|
"eval_samples_per_second": 4.046, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 45.47320878669678, |
|
"grad_norm": 0.18253710865974426, |
|
"learning_rate": 4.536103682369882e-05, |
|
"loss": 0.0427, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 45.47320878669678, |
|
"eval_cer": 0.013921800947867298, |
|
"eval_loss": 0.02515345811843872, |
|
"eval_runtime": 22.4087, |
|
"eval_samples_per_second": 4.016, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 45.57585711352905, |
|
"grad_norm": 0.5241480469703674, |
|
"learning_rate": 4.433244188438593e-05, |
|
"loss": 0.0424, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 45.57585711352905, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.024990374222397804, |
|
"eval_runtime": 22.5101, |
|
"eval_samples_per_second": 3.998, |
|
"eval_steps_per_second": 0.044, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 45.67850544036132, |
|
"grad_norm": 0.26579299569129944, |
|
"learning_rate": 4.330384694507303e-05, |
|
"loss": 0.0422, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 45.67850544036132, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.025033339858055115, |
|
"eval_runtime": 22.2997, |
|
"eval_samples_per_second": 4.036, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 45.781153767193594, |
|
"grad_norm": 0.15677900612354279, |
|
"learning_rate": 4.2275252005760136e-05, |
|
"loss": 0.0424, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 45.781153767193594, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.025280646979808807, |
|
"eval_runtime": 22.1999, |
|
"eval_samples_per_second": 4.054, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.050044682752457555, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 45.88380209402587, |
|
"grad_norm": 0.25194716453552246, |
|
"learning_rate": 4.1246657066447236e-05, |
|
"loss": 0.0428, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 45.88380209402587, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.02510838583111763, |
|
"eval_runtime": 22.1534, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 45.98645042085814, |
|
"grad_norm": 0.24914862215518951, |
|
"learning_rate": 4.0218062127134336e-05, |
|
"loss": 0.0428, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 45.98645042085814, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.02476254291832447, |
|
"eval_runtime": 22.087, |
|
"eval_samples_per_second": 4.075, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 46.089098747690414, |
|
"grad_norm": 0.318974107503891, |
|
"learning_rate": 3.9189467187821436e-05, |
|
"loss": 0.0421, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 46.089098747690414, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.02481299825012684, |
|
"eval_runtime": 21.9633, |
|
"eval_samples_per_second": 4.098, |
|
"eval_steps_per_second": 0.046, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 46.19174707452269, |
|
"grad_norm": 0.21947523951530457, |
|
"learning_rate": 3.816087224850854e-05, |
|
"loss": 0.0421, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 46.19174707452269, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.025007640942931175, |
|
"eval_runtime": 22.0576, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 46.29439540135496, |
|
"grad_norm": 0.2869652807712555, |
|
"learning_rate": 3.713227730919564e-05, |
|
"loss": 0.0417, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 46.29439540135496, |
|
"eval_cer": 0.014069905213270142, |
|
"eval_loss": 0.025139357894659042, |
|
"eval_runtime": 22.1428, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 46.39704372818723, |
|
"grad_norm": 0.24226853251457214, |
|
"learning_rate": 3.610368236988274e-05, |
|
"loss": 0.0423, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 46.39704372818723, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.02465611696243286, |
|
"eval_runtime": 22.2059, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 46.4996920550195, |
|
"grad_norm": 0.508613646030426, |
|
"learning_rate": 3.507508743056984e-05, |
|
"loss": 0.0422, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 46.4996920550195, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.024790233001112938, |
|
"eval_runtime": 22.0074, |
|
"eval_samples_per_second": 4.09, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 46.602340381851775, |
|
"grad_norm": 0.22070400416851044, |
|
"learning_rate": 3.404649249125694e-05, |
|
"loss": 0.0415, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 46.602340381851775, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.024966726079583168, |
|
"eval_runtime": 22.0647, |
|
"eval_samples_per_second": 4.079, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 46.70498870868405, |
|
"grad_norm": 0.19661836326122284, |
|
"learning_rate": 3.301789755194404e-05, |
|
"loss": 0.0424, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 46.70498870868405, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.024841254577040672, |
|
"eval_runtime": 22.1576, |
|
"eval_samples_per_second": 4.062, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 46.80763703551632, |
|
"grad_norm": 0.475782185792923, |
|
"learning_rate": 3.198930261263114e-05, |
|
"loss": 0.0424, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 46.80763703551632, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.02477007918059826, |
|
"eval_runtime": 22.1397, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 46.910285362348596, |
|
"grad_norm": 0.36696240305900574, |
|
"learning_rate": 3.096070767331825e-05, |
|
"loss": 0.0426, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 46.910285362348596, |
|
"eval_cer": 0.013625592417061612, |
|
"eval_loss": 0.024692127481102943, |
|
"eval_runtime": 22.0727, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 47.01293368918087, |
|
"grad_norm": 0.1705227941274643, |
|
"learning_rate": 2.993211273400535e-05, |
|
"loss": 0.0422, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 47.01293368918087, |
|
"eval_cer": 0.014514218009478674, |
|
"eval_loss": 0.024774568155407906, |
|
"eval_runtime": 22.1972, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 47.115582016013136, |
|
"grad_norm": 0.21439406275749207, |
|
"learning_rate": 2.8903517794692452e-05, |
|
"loss": 0.0418, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 47.115582016013136, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.024719279259443283, |
|
"eval_runtime": 22.0766, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 47.21823034284541, |
|
"grad_norm": 0.34435534477233887, |
|
"learning_rate": 2.7874922855379552e-05, |
|
"loss": 0.0415, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 47.21823034284541, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.024668768048286438, |
|
"eval_runtime": 22.0611, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 47.32087866967768, |
|
"grad_norm": 0.25130486488342285, |
|
"learning_rate": 2.6846327916066652e-05, |
|
"loss": 0.0417, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 47.32087866967768, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.024554278701543808, |
|
"eval_runtime": 22.0634, |
|
"eval_samples_per_second": 4.079, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 47.423526996509956, |
|
"grad_norm": 0.29562491178512573, |
|
"learning_rate": 2.5817732976753755e-05, |
|
"loss": 0.0414, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 47.423526996509956, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.02472042851150036, |
|
"eval_runtime": 22.1921, |
|
"eval_samples_per_second": 4.055, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.043789097408400354, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 47.52617532334223, |
|
"grad_norm": 0.4088131785392761, |
|
"learning_rate": 2.4789138037440855e-05, |
|
"loss": 0.0421, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 47.52617532334223, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.024495158344507217, |
|
"eval_runtime": 22.2123, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 47.6288236501745, |
|
"grad_norm": 0.27792465686798096, |
|
"learning_rate": 2.3760543098127955e-05, |
|
"loss": 0.0419, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 47.6288236501745, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.02450607530772686, |
|
"eval_runtime": 22.2255, |
|
"eval_samples_per_second": 4.049, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 47.73147197700678, |
|
"grad_norm": 0.19159696996212006, |
|
"learning_rate": 2.2731948158815062e-05, |
|
"loss": 0.042, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 47.73147197700678, |
|
"eval_cer": 0.014218009478672985, |
|
"eval_loss": 0.02452634647488594, |
|
"eval_runtime": 22.143, |
|
"eval_samples_per_second": 4.064, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.044682752457551385, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 47.83412030383905, |
|
"grad_norm": 0.2656868100166321, |
|
"learning_rate": 2.1703353219502162e-05, |
|
"loss": 0.0412, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 47.83412030383905, |
|
"eval_cer": 0.015550947867298577, |
|
"eval_loss": 0.024535449221730232, |
|
"eval_runtime": 22.2342, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.049151027703306524, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 47.93676863067132, |
|
"grad_norm": 0.15053987503051758, |
|
"learning_rate": 2.0674758280189262e-05, |
|
"loss": 0.0416, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 47.93676863067132, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.024381397292017937, |
|
"eval_runtime": 22.2314, |
|
"eval_samples_per_second": 4.048, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 48.03941695750359, |
|
"grad_norm": 0.2812643051147461, |
|
"learning_rate": 1.9646163340876362e-05, |
|
"loss": 0.0417, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 48.03941695750359, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.02442990057170391, |
|
"eval_runtime": 22.3372, |
|
"eval_samples_per_second": 4.029, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 48.142065284335864, |
|
"grad_norm": 0.31642502546310425, |
|
"learning_rate": 1.8617568401563465e-05, |
|
"loss": 0.0419, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 48.142065284335864, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.024683654308319092, |
|
"eval_runtime": 22.1497, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 48.24471361116814, |
|
"grad_norm": 0.24842867255210876, |
|
"learning_rate": 1.7588973462250565e-05, |
|
"loss": 0.0413, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 48.24471361116814, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.0246568750590086, |
|
"eval_runtime": 22.1198, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 48.34736193800041, |
|
"grad_norm": 0.22725574672222137, |
|
"learning_rate": 1.6560378522937665e-05, |
|
"loss": 0.0417, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 48.34736193800041, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.024483025074005127, |
|
"eval_runtime": 22.0753, |
|
"eval_samples_per_second": 4.077, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 48.450010264832684, |
|
"grad_norm": 0.415797621011734, |
|
"learning_rate": 1.553178358362477e-05, |
|
"loss": 0.0412, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 48.450010264832684, |
|
"eval_cer": 0.014366113744075829, |
|
"eval_loss": 0.024604879319667816, |
|
"eval_runtime": 22.2084, |
|
"eval_samples_per_second": 4.053, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.045576407506702415, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 48.55265859166496, |
|
"grad_norm": 0.4157191216945648, |
|
"learning_rate": 1.450318864431187e-05, |
|
"loss": 0.0416, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 48.55265859166496, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.024540413171052933, |
|
"eval_runtime": 21.9902, |
|
"eval_samples_per_second": 4.093, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 48.65530691849723, |
|
"grad_norm": 0.20073458552360535, |
|
"learning_rate": 1.3474593704998972e-05, |
|
"loss": 0.0413, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 48.65530691849723, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.02452738583087921, |
|
"eval_runtime": 22.0306, |
|
"eval_samples_per_second": 4.085, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 48.7579552453295, |
|
"grad_norm": 0.3352334499359131, |
|
"learning_rate": 1.2445998765686073e-05, |
|
"loss": 0.0413, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 48.7579552453295, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.024565977975726128, |
|
"eval_runtime": 22.1491, |
|
"eval_samples_per_second": 4.063, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 48.86060357216177, |
|
"grad_norm": 0.20034602284431458, |
|
"learning_rate": 1.1417403826373175e-05, |
|
"loss": 0.0418, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 48.86060357216177, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.024530308321118355, |
|
"eval_runtime": 22.0802, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 48.963251898994045, |
|
"grad_norm": 0.204274982213974, |
|
"learning_rate": 1.0388808887060275e-05, |
|
"loss": 0.0417, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 48.963251898994045, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.02437027543783188, |
|
"eval_runtime": 22.051, |
|
"eval_samples_per_second": 4.081, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 49.06590022582632, |
|
"grad_norm": 0.27974188327789307, |
|
"learning_rate": 9.360213947747377e-06, |
|
"loss": 0.0417, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 49.06590022582632, |
|
"eval_cer": 0.015402843601895734, |
|
"eval_loss": 0.024484841153025627, |
|
"eval_runtime": 22.1209, |
|
"eval_samples_per_second": 4.069, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04825737265415549, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 49.16854855265859, |
|
"grad_norm": 0.15514741837978363, |
|
"learning_rate": 8.33161900843448e-06, |
|
"loss": 0.0413, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 49.16854855265859, |
|
"eval_cer": 0.014958530805687204, |
|
"eval_loss": 0.024350464344024658, |
|
"eval_runtime": 21.9927, |
|
"eval_samples_per_second": 4.092, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 49.271196879490866, |
|
"grad_norm": 0.13631823658943176, |
|
"learning_rate": 7.30302406912158e-06, |
|
"loss": 0.0413, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 49.271196879490866, |
|
"eval_cer": 0.015106635071090047, |
|
"eval_loss": 0.024330323562026024, |
|
"eval_runtime": 22.2136, |
|
"eval_samples_per_second": 4.052, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 49.37384520632314, |
|
"grad_norm": 0.18198832869529724, |
|
"learning_rate": 6.274429129808681e-06, |
|
"loss": 0.0409, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 49.37384520632314, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.024377064779400826, |
|
"eval_runtime": 22.1427, |
|
"eval_samples_per_second": 4.065, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 49.47649353315541, |
|
"grad_norm": 0.20921219885349274, |
|
"learning_rate": 5.245834190495783e-06, |
|
"loss": 0.0413, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 49.47649353315541, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.024373607710003853, |
|
"eval_runtime": 22.062, |
|
"eval_samples_per_second": 4.079, |
|
"eval_steps_per_second": 0.045, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 49.57914185998768, |
|
"grad_norm": 0.24988599121570587, |
|
"learning_rate": 4.217239251182884e-06, |
|
"loss": 0.0414, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 49.57914185998768, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.024304602295160294, |
|
"eval_runtime": 12.6617, |
|
"eval_samples_per_second": 7.108, |
|
"eval_steps_per_second": 0.079, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 49.68179018681995, |
|
"grad_norm": 0.22157305479049683, |
|
"learning_rate": 3.1886443118699856e-06, |
|
"loss": 0.0407, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 49.68179018681995, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.02429259568452835, |
|
"eval_runtime": 12.7319, |
|
"eval_samples_per_second": 7.069, |
|
"eval_steps_per_second": 0.079, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 49.784438513652226, |
|
"grad_norm": 0.18318428099155426, |
|
"learning_rate": 2.1600493725570872e-06, |
|
"loss": 0.0413, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 49.784438513652226, |
|
"eval_cer": 0.014662322274881517, |
|
"eval_loss": 0.024326322600245476, |
|
"eval_runtime": 12.7187, |
|
"eval_samples_per_second": 7.076, |
|
"eval_steps_per_second": 0.079, |
|
"eval_wer": 0.04647006255585344, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 49.8870868404845, |
|
"grad_norm": 0.23897279798984528, |
|
"learning_rate": 1.1314544332441885e-06, |
|
"loss": 0.0416, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 49.8870868404845, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.024302508682012558, |
|
"eval_runtime": 12.6346, |
|
"eval_samples_per_second": 7.123, |
|
"eval_steps_per_second": 0.079, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 49.98973516731677, |
|
"grad_norm": 0.2672029733657837, |
|
"learning_rate": 1.0285949393128985e-07, |
|
"loss": 0.0416, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 49.98973516731677, |
|
"eval_cer": 0.01481042654028436, |
|
"eval_loss": 0.024304790422320366, |
|
"eval_runtime": 12.659, |
|
"eval_samples_per_second": 7.11, |
|
"eval_steps_per_second": 0.079, |
|
"eval_wer": 0.04736371760500447, |
|
"step": 487000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 487100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 4000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.703420432200581e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|