|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 35.0, |
|
"eval_steps": 500, |
|
"global_step": 57785, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06056935190793458, |
|
"grad_norm": 12.987489700317383, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 23.1996, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12113870381586916, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.7e-06, |
|
"loss": 21.235, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18170805572380375, |
|
"grad_norm": 8.480514526367188, |
|
"learning_rate": 1.47e-05, |
|
"loss": 9.7594, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24227740763173833, |
|
"grad_norm": 1.9446700811386108, |
|
"learning_rate": 1.97e-05, |
|
"loss": 4.2771, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.30284675953967294, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.465e-05, |
|
"loss": 3.8795, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30284675953967294, |
|
"eval_cer": 0.9859702765384335, |
|
"eval_loss": 3.786914110183716, |
|
"eval_runtime": 159.869, |
|
"eval_samples_per_second": 19.616, |
|
"eval_steps_per_second": 1.226, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3634161114476075, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.96e-05, |
|
"loss": 3.7933, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4239854633555421, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.455e-05, |
|
"loss": 3.6564, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48455481526347666, |
|
"grad_norm": 2.748781442642212, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 3.0316, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5451241671714112, |
|
"grad_norm": 1.7328834533691406, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 2.2936, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6056935190793459, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.945e-05, |
|
"loss": 1.8805, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6056935190793459, |
|
"eval_cer": 0.4123792289372146, |
|
"eval_loss": 2.042300224304199, |
|
"eval_runtime": 157.3071, |
|
"eval_samples_per_second": 19.936, |
|
"eval_steps_per_second": 1.246, |
|
"eval_wer": 0.6415560932805063, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6662628709872804, |
|
"grad_norm": 2.0204882621765137, |
|
"learning_rate": 5.445e-05, |
|
"loss": 1.7284, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.726832222895215, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.94e-05, |
|
"loss": 1.6213, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7874015748031497, |
|
"grad_norm": 1.818031907081604, |
|
"learning_rate": 6.440000000000001e-05, |
|
"loss": 1.6123, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8479709267110842, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.935e-05, |
|
"loss": 1.5788, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9085402786190188, |
|
"grad_norm": 4.519701957702637, |
|
"learning_rate": 7.435e-05, |
|
"loss": 1.5823, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9085402786190188, |
|
"eval_cer": 0.37009765685938034, |
|
"eval_loss": 1.7621949911117554, |
|
"eval_runtime": 159.7287, |
|
"eval_samples_per_second": 19.633, |
|
"eval_steps_per_second": 1.227, |
|
"eval_wer": 0.5792006807243333, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9691096305269533, |
|
"grad_norm": 2.352220296859741, |
|
"learning_rate": 7.935e-05, |
|
"loss": 1.6437, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.029678982434888, |
|
"grad_norm": 1.7396734952926636, |
|
"learning_rate": 8.435e-05, |
|
"loss": 1.5773, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0902483343428226, |
|
"grad_norm": 5.526904582977295, |
|
"learning_rate": 8.93e-05, |
|
"loss": 1.6536, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.150817686250757, |
|
"grad_norm": 1.5785249471664429, |
|
"learning_rate": 9.425e-05, |
|
"loss": 1.8976, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.2113870381586918, |
|
"grad_norm": 1.2670302391052246, |
|
"learning_rate": 9.925000000000001e-05, |
|
"loss": 2.2702, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2113870381586918, |
|
"eval_cer": 0.5232754048235625, |
|
"eval_loss": 2.059516668319702, |
|
"eval_runtime": 156.6202, |
|
"eval_samples_per_second": 20.023, |
|
"eval_steps_per_second": 1.251, |
|
"eval_wer": 0.844177945595235, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2719563900666264, |
|
"grad_norm": 1.9519144296646118, |
|
"learning_rate": 9.984762929102805e-05, |
|
"loss": 2.1941, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3325257419745609, |
|
"grad_norm": 5.191477298736572, |
|
"learning_rate": 9.967016222999016e-05, |
|
"loss": 2.3686, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.3930950938824955, |
|
"grad_norm": 1.6009842157363892, |
|
"learning_rate": 9.949269516895223e-05, |
|
"loss": 2.728, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.45366444579043, |
|
"grad_norm": 1.253065586090088, |
|
"learning_rate": 9.931522810791432e-05, |
|
"loss": 2.5374, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.5142337976983646, |
|
"grad_norm": 2.445673942565918, |
|
"learning_rate": 9.913596845030027e-05, |
|
"loss": 2.7429, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5142337976983646, |
|
"eval_cer": 0.8706072613438999, |
|
"eval_loss": 2.9181418418884277, |
|
"eval_runtime": 159.742, |
|
"eval_samples_per_second": 19.632, |
|
"eval_steps_per_second": 1.227, |
|
"eval_wer": 0.9791528172946526, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.574803149606299, |
|
"grad_norm": 0.6720703840255737, |
|
"learning_rate": 9.895670879268621e-05, |
|
"loss": 2.9946, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.6353725015142337, |
|
"grad_norm": 1.736120581626892, |
|
"learning_rate": 9.877744913507216e-05, |
|
"loss": 3.1198, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.6959418534221684, |
|
"grad_norm": 1.216760277748108, |
|
"learning_rate": 9.859998207403425e-05, |
|
"loss": 3.1313, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7565112053301029, |
|
"grad_norm": 0.8678397536277771, |
|
"learning_rate": 9.842430760957248e-05, |
|
"loss": 3.1411, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.8170805572380375, |
|
"grad_norm": 0.7253884077072144, |
|
"learning_rate": 9.824504795195841e-05, |
|
"loss": 3.1077, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8170805572380375, |
|
"eval_cer": 0.9060923736128877, |
|
"eval_loss": 3.039332866668701, |
|
"eval_runtime": 158.2417, |
|
"eval_samples_per_second": 19.818, |
|
"eval_steps_per_second": 1.239, |
|
"eval_wer": 0.9898157257957295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8776499091459722, |
|
"grad_norm": 1.5997949838638306, |
|
"learning_rate": 9.806578829434437e-05, |
|
"loss": 3.0864, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.9382192610539066, |
|
"grad_norm": 2.8447787761688232, |
|
"learning_rate": 9.788832123330644e-05, |
|
"loss": 3.0468, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.9987886129618413, |
|
"grad_norm": 1.899368405342102, |
|
"learning_rate": 9.770906157569239e-05, |
|
"loss": 3.0039, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.059357964869776, |
|
"grad_norm": 2.3413126468658447, |
|
"learning_rate": 9.752980191807835e-05, |
|
"loss": 3.0061, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.1199273167777104, |
|
"grad_norm": 1.4248082637786865, |
|
"learning_rate": 9.735233485704042e-05, |
|
"loss": 2.9896, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.1199273167777104, |
|
"eval_cer": 0.852750304202677, |
|
"eval_loss": 2.858107328414917, |
|
"eval_runtime": 154.1563, |
|
"eval_samples_per_second": 20.343, |
|
"eval_steps_per_second": 1.271, |
|
"eval_wer": 0.9778498683755684, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.1804966686856453, |
|
"grad_norm": 1.692192554473877, |
|
"learning_rate": 9.717486779600251e-05, |
|
"loss": 2.9576, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.2410660205935797, |
|
"grad_norm": 2.209773302078247, |
|
"learning_rate": 9.69974007349646e-05, |
|
"loss": 3.0105, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.301635372501514, |
|
"grad_norm": 3.159461259841919, |
|
"learning_rate": 9.681993367392669e-05, |
|
"loss": 3.0166, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.362204724409449, |
|
"grad_norm": 1.4568028450012207, |
|
"learning_rate": 9.664067401631263e-05, |
|
"loss": 3.1414, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.4227740763173835, |
|
"grad_norm": 3.3971126079559326, |
|
"learning_rate": 9.646320695527472e-05, |
|
"loss": 3.2643, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.4227740763173835, |
|
"eval_cer": 0.8024866618826245, |
|
"eval_loss": 3.0456228256225586, |
|
"eval_runtime": 155.1523, |
|
"eval_samples_per_second": 20.212, |
|
"eval_steps_per_second": 1.263, |
|
"eval_wer": 0.9648735607732603, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.483343428225318, |
|
"grad_norm": 3.6780447959899902, |
|
"learning_rate": 9.628394729766067e-05, |
|
"loss": 3.315, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.543912780133253, |
|
"grad_norm": 2.1955392360687256, |
|
"learning_rate": 9.610468764004662e-05, |
|
"loss": 3.4117, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.6044821320411873, |
|
"grad_norm": 4.204680919647217, |
|
"learning_rate": 9.592542798243256e-05, |
|
"loss": 3.4612, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.6650514839491217, |
|
"grad_norm": 3.945098400115967, |
|
"learning_rate": 9.574616832481851e-05, |
|
"loss": 3.4925, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.725620835857056, |
|
"grad_norm": 4.762417316436768, |
|
"learning_rate": 9.556690866720444e-05, |
|
"loss": 3.6542, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.725620835857056, |
|
"eval_cer": 0.8007810468732125, |
|
"eval_loss": 3.4605765342712402, |
|
"eval_runtime": 156.3504, |
|
"eval_samples_per_second": 20.058, |
|
"eval_steps_per_second": 1.254, |
|
"eval_wer": 0.9658042385726061, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.786190187764991, |
|
"grad_norm": 2.70656418800354, |
|
"learning_rate": 9.53876490095904e-05, |
|
"loss": 3.7718, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.8467595396729255, |
|
"grad_norm": 4.134393215179443, |
|
"learning_rate": 9.521197454512862e-05, |
|
"loss": 3.7792, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.90732889158086, |
|
"grad_norm": 3.5866096019744873, |
|
"learning_rate": 9.50345074840907e-05, |
|
"loss": 3.817, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.967898243488795, |
|
"grad_norm": 2.202087163925171, |
|
"learning_rate": 9.485524782647665e-05, |
|
"loss": 3.7782, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.0284675953967293, |
|
"grad_norm": 0.6978887915611267, |
|
"learning_rate": 9.467778076543874e-05, |
|
"loss": 3.7622, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.0284675953967293, |
|
"eval_cer": 0.8315237174087132, |
|
"eval_loss": 3.647613286972046, |
|
"eval_runtime": 156.6955, |
|
"eval_samples_per_second": 20.013, |
|
"eval_steps_per_second": 1.251, |
|
"eval_wer": 0.9835137075544447, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.0890369473046637, |
|
"grad_norm": 0.5634181499481201, |
|
"learning_rate": 9.449852110782469e-05, |
|
"loss": 3.7553, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.1496062992125986, |
|
"grad_norm": 0.5986543297767639, |
|
"learning_rate": 9.431926145021063e-05, |
|
"loss": 3.79, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.210175651120533, |
|
"grad_norm": 0.4283181428909302, |
|
"learning_rate": 9.414000179259658e-05, |
|
"loss": 3.7977, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.2707450030284675, |
|
"grad_norm": 0.44843679666519165, |
|
"learning_rate": 9.396253473155867e-05, |
|
"loss": 3.7977, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.331314354936402, |
|
"grad_norm": 0.09695058315992355, |
|
"learning_rate": 9.378327507394461e-05, |
|
"loss": 3.8614, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.331314354936402, |
|
"eval_cer": 0.862848793069379, |
|
"eval_loss": 3.832617998123169, |
|
"eval_runtime": 156.1805, |
|
"eval_samples_per_second": 20.079, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 0.992421623633898, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.391883706844337, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.36058080129067e-05, |
|
"loss": 3.925, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.4524530587522713, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.342834095186879e-05, |
|
"loss": 3.9914, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.5130224106602057, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.324908129425474e-05, |
|
"loss": 4.0178, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.5735917625681406, |
|
"grad_norm": 0.5871341824531555, |
|
"learning_rate": 9.306982163664067e-05, |
|
"loss": 4.0331, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.634161114476075, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.289056197902663e-05, |
|
"loss": 3.9769, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.634161114476075, |
|
"eval_cer": 0.8807837508970079, |
|
"eval_loss": 4.00545072555542, |
|
"eval_runtime": 156.4676, |
|
"eval_samples_per_second": 20.042, |
|
"eval_steps_per_second": 1.253, |
|
"eval_wer": 0.9953200202090037, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.6947304663840095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.271130232141257e-05, |
|
"loss": 3.9658, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.7552998182919444, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.253204266379851e-05, |
|
"loss": 4.1082, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.815869170199879, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.235457560276061e-05, |
|
"loss": 4.1125, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.8764385221078133, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.217890113829884e-05, |
|
"loss": 4.1026, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.937007874015748, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.199964148068477e-05, |
|
"loss": 4.1241, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.937007874015748, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.6186, |
|
"eval_samples_per_second": 20.152, |
|
"eval_steps_per_second": 1.259, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.9975772259236826, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.182217441964687e-05, |
|
"loss": 4.1577, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.058146577831617, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.16429147620328e-05, |
|
"loss": 4.1595, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.118715929739552, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.146365510441875e-05, |
|
"loss": 4.1332, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.179285281647486, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.128618804338084e-05, |
|
"loss": 4.0768, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.239854633555421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.110692838576679e-05, |
|
"loss": 4.1261, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.239854633555421, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 162.2323, |
|
"eval_samples_per_second": 19.33, |
|
"eval_steps_per_second": 1.208, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.300423985463356, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.092946132472888e-05, |
|
"loss": 4.1652, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.360993337371291, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.075199426369096e-05, |
|
"loss": 4.1204, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.421562689279225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.057273460607691e-05, |
|
"loss": 4.0598, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.4821320411871595, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.039347494846286e-05, |
|
"loss": 4.0921, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.5427013930950935, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.02142152908488e-05, |
|
"loss": 4.1009, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.5427013930950935, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.892, |
|
"eval_samples_per_second": 19.988, |
|
"eval_steps_per_second": 1.249, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.603270745003028, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.003495563323473e-05, |
|
"loss": 4.0789, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.663840096910963, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.985748857219684e-05, |
|
"loss": 4.1577, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.724409448818898, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.968002151115892e-05, |
|
"loss": 4.1059, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.784978800726832, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.950076185354487e-05, |
|
"loss": 4.1344, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 4.845548152634767, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.93250873890831e-05, |
|
"loss": 4.1698, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.845548152634767, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 154.4027, |
|
"eval_samples_per_second": 20.311, |
|
"eval_steps_per_second": 1.269, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.906117504542701, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.914582773146903e-05, |
|
"loss": 4.0951, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.966686856450636, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.896836067043112e-05, |
|
"loss": 4.1298, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.027256208358571, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.878910101281707e-05, |
|
"loss": 4.123, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.087825560266505, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.860984135520301e-05, |
|
"loss": 4.0479, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.14839491217444, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.84323742941651e-05, |
|
"loss": 4.129, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.14839491217444, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 161.111, |
|
"eval_samples_per_second": 19.465, |
|
"eval_steps_per_second": 1.217, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.208964264082375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.825669982970333e-05, |
|
"loss": 4.107, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.269533615990309, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.807744017208928e-05, |
|
"loss": 4.1253, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.3301029678982434, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.789997311105135e-05, |
|
"loss": 4.0814, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.390672319806178, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.772250605001346e-05, |
|
"loss": 4.1564, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.451241671714112, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.754324639239939e-05, |
|
"loss": 4.1413, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.451241671714112, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.3438, |
|
"eval_samples_per_second": 20.187, |
|
"eval_steps_per_second": 1.262, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.511811023622047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.736398673478535e-05, |
|
"loss": 4.1572, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.572380375529982, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.71847270771713e-05, |
|
"loss": 4.1191, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.632949727437916, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.700546741955723e-05, |
|
"loss": 4.1569, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.693519079345851, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.682620776194317e-05, |
|
"loss": 4.1067, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.754088431253786, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.664694810432913e-05, |
|
"loss": 4.122, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.754088431253786, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.0033, |
|
"eval_samples_per_second": 19.974, |
|
"eval_steps_per_second": 1.248, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.81465778316172, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.646768844671507e-05, |
|
"loss": 4.0939, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 5.875227135069655, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.628842878910101e-05, |
|
"loss": 4.07, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 5.93579648697759, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.610916913148697e-05, |
|
"loss": 4.1039, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 5.996365838885524, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.593170207044905e-05, |
|
"loss": 4.1332, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 6.0569351907934585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.575423500941113e-05, |
|
"loss": 4.1652, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.0569351907934585, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.7426, |
|
"eval_samples_per_second": 19.88, |
|
"eval_steps_per_second": 1.243, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.117504542701393, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.557497535179708e-05, |
|
"loss": 4.1403, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 6.178073894609327, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.539571569418303e-05, |
|
"loss": 4.0656, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 6.238643246517262, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.521645603656897e-05, |
|
"loss": 4.1412, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 6.299212598425197, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.503898897553106e-05, |
|
"loss": 4.1346, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 6.359781950333131, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.486152191449315e-05, |
|
"loss": 4.1801, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.359781950333131, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 154.332, |
|
"eval_samples_per_second": 20.32, |
|
"eval_steps_per_second": 1.27, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.420351302241066, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.468405485345522e-05, |
|
"loss": 4.0912, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 6.480920654149001, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.450479519584118e-05, |
|
"loss": 4.0902, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 6.541490006056935, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.432732813480326e-05, |
|
"loss": 4.1044, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.60205935796487, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.414806847718922e-05, |
|
"loss": 4.113, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 6.662628709872804, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.396880881957516e-05, |
|
"loss": 4.092, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.662628709872804, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.9259, |
|
"eval_samples_per_second": 20.112, |
|
"eval_steps_per_second": 1.257, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.723198061780739, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.37895491619611e-05, |
|
"loss": 4.1542, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 6.783767413688674, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.36120821009232e-05, |
|
"loss": 4.1555, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 6.8443367655966085, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.343461503988527e-05, |
|
"loss": 4.1089, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 6.9049061175045425, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.325714797884736e-05, |
|
"loss": 4.1575, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 6.965475469412477, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.307788832123331e-05, |
|
"loss": 4.0204, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.965475469412477, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.0735, |
|
"eval_samples_per_second": 19.965, |
|
"eval_steps_per_second": 1.248, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.026044821320412, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.290221385677154e-05, |
|
"loss": 4.112, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 7.086614173228346, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.272295419915748e-05, |
|
"loss": 4.0831, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 7.147183525136281, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.254369454154343e-05, |
|
"loss": 4.1267, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 7.207752877044216, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.236443488392938e-05, |
|
"loss": 4.0923, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 7.26832222895215, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.218696782289145e-05, |
|
"loss": 4.1036, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.26832222895215, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.0271, |
|
"eval_samples_per_second": 19.971, |
|
"eval_steps_per_second": 1.248, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.328891580860085, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.200770816527741e-05, |
|
"loss": 4.1566, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 7.389460932768019, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.182844850766336e-05, |
|
"loss": 4.1608, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 7.450030284675954, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.165098144662545e-05, |
|
"loss": 4.1211, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 7.510599636583889, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.147351438558752e-05, |
|
"loss": 4.0681, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 7.571168988491824, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.129604732454962e-05, |
|
"loss": 4.1918, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.571168988491824, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.5121, |
|
"eval_samples_per_second": 20.037, |
|
"eval_steps_per_second": 1.252, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.631738340399758, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.11185802635117e-05, |
|
"loss": 4.0983, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.094111320247379e-05, |
|
"loss": 4.1413, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 7.7528770442156265, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.076185354485973e-05, |
|
"loss": 4.131, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 7.813446396123561, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.058438648382182e-05, |
|
"loss": 4.1407, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 7.874015748031496, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.040512682620777e-05, |
|
"loss": 4.1059, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.874015748031496, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 158.1021, |
|
"eval_samples_per_second": 19.835, |
|
"eval_steps_per_second": 1.24, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.93458509993943, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.022586716859371e-05, |
|
"loss": 4.1219, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 7.995154451847365, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.004660751097966e-05, |
|
"loss": 4.1108, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 8.0557238037553, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.986914044994175e-05, |
|
"loss": 4.1494, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 8.116293155663234, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.968988079232769e-05, |
|
"loss": 4.1274, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 8.176862507571169, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.951062113471364e-05, |
|
"loss": 4.0833, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.176862507571169, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.4522, |
|
"eval_samples_per_second": 20.173, |
|
"eval_steps_per_second": 1.261, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.237431859479104, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.933136147709959e-05, |
|
"loss": 4.0988, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 8.298001211387039, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.915389441606167e-05, |
|
"loss": 4.1511, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 8.358570563294972, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.897642735502375e-05, |
|
"loss": 4.1235, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 8.419139915202907, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.879716769740971e-05, |
|
"loss": 4.0692, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 8.479709267110842, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.861970063637178e-05, |
|
"loss": 4.1278, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.479709267110842, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 154.1431, |
|
"eval_samples_per_second": 20.345, |
|
"eval_steps_per_second": 1.272, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.540278619018776, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.844044097875773e-05, |
|
"loss": 4.0693, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 8.600847970926711, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.826297391771982e-05, |
|
"loss": 4.1555, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 8.661417322834646, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.808371426010576e-05, |
|
"loss": 4.0988, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 8.721986674742581, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.790445460249172e-05, |
|
"loss": 4.0971, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 8.782556026650514, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.77269875414538e-05, |
|
"loss": 4.1365, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 8.782556026650514, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.2386, |
|
"eval_samples_per_second": 19.944, |
|
"eval_steps_per_second": 1.247, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 8.84312537855845, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.754772788383974e-05, |
|
"loss": 4.1029, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 8.903694730466384, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.737026082280183e-05, |
|
"loss": 4.1741, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 8.964264082374319, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.719100116518778e-05, |
|
"loss": 4.146, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 9.024833434282254, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.701174150757372e-05, |
|
"loss": 4.1374, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 9.085402786190187, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.683248184995967e-05, |
|
"loss": 4.1201, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.085402786190187, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 153.8308, |
|
"eval_samples_per_second": 20.386, |
|
"eval_steps_per_second": 1.274, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.145972138098122, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.665501478892176e-05, |
|
"loss": 4.1728, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 9.206541490006057, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.64757551313077e-05, |
|
"loss": 4.1121, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 9.267110841913992, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.629828807026978e-05, |
|
"loss": 4.1136, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 9.327680193821926, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.611902841265574e-05, |
|
"loss": 4.1399, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 9.388249545729861, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.593976875504169e-05, |
|
"loss": 4.1476, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 9.388249545729861, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.9225, |
|
"eval_samples_per_second": 19.858, |
|
"eval_steps_per_second": 1.241, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 9.448818897637794, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.576050909742762e-05, |
|
"loss": 4.1324, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 9.50938824954573, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.558304203638972e-05, |
|
"loss": 4.0856, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 9.569957601453664, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.54055749753518e-05, |
|
"loss": 4.1506, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 9.6305269533616, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.522631531773776e-05, |
|
"loss": 4.0439, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 9.691096305269534, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.504705566012369e-05, |
|
"loss": 4.0935, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.691096305269534, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.3971, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 1.245, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 9.751665657177469, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.486779600250963e-05, |
|
"loss": 4.1423, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 9.812235009085402, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.46885363448956e-05, |
|
"loss": 4.0898, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 9.872804360993337, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.451106928385767e-05, |
|
"loss": 4.106, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 9.933373712901272, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.433360222281976e-05, |
|
"loss": 4.0925, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 9.993943064809207, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.415613516178184e-05, |
|
"loss": 4.1109, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 9.993943064809207, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.6444, |
|
"eval_samples_per_second": 19.893, |
|
"eval_steps_per_second": 1.243, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 10.054512416717142, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.397866810074393e-05, |
|
"loss": 4.1463, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 10.115081768625076, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.379940844312988e-05, |
|
"loss": 4.0826, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 10.17565112053301, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.362014878551583e-05, |
|
"loss": 4.097, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 10.236220472440944, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.344088912790177e-05, |
|
"loss": 4.1438, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 10.29678982434888, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.326162947028772e-05, |
|
"loss": 4.1389, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 10.29678982434888, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.2298, |
|
"eval_samples_per_second": 20.073, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 10.357359176256814, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.308236981267365e-05, |
|
"loss": 4.0935, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 10.41792852816475, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.290311015505961e-05, |
|
"loss": 4.1164, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 10.478497880072684, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.272743569059783e-05, |
|
"loss": 4.1843, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 10.539067231980617, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.254996862955992e-05, |
|
"loss": 4.0793, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 10.599636583888552, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.237250156852202e-05, |
|
"loss": 4.0907, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 10.599636583888552, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 158.6593, |
|
"eval_samples_per_second": 19.766, |
|
"eval_steps_per_second": 1.235, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 10.660205935796487, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.219324191090795e-05, |
|
"loss": 4.1483, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 10.720775287704422, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.201577484987004e-05, |
|
"loss": 4.131, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 10.781344639612357, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.183651519225598e-05, |
|
"loss": 4.0796, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 10.841913991520292, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.165725553464193e-05, |
|
"loss": 4.1495, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 10.902483343428225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.147799587702788e-05, |
|
"loss": 4.0825, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 10.902483343428225, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.0783, |
|
"eval_samples_per_second": 20.092, |
|
"eval_steps_per_second": 1.256, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 10.96305269533616, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.130232141256611e-05, |
|
"loss": 4.1538, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 11.023622047244094, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.11248543515282e-05, |
|
"loss": 4.0415, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 11.08419139915203, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.094738729049027e-05, |
|
"loss": 4.1193, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 11.144760751059964, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.076992022945237e-05, |
|
"loss": 4.1895, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 11.205330102967899, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.05906605718383e-05, |
|
"loss": 4.1094, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 11.205330102967899, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.1732, |
|
"eval_samples_per_second": 20.21, |
|
"eval_steps_per_second": 1.263, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 11.265899454875832, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.04131935108004e-05, |
|
"loss": 4.0814, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 11.326468806783767, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.023572644976248e-05, |
|
"loss": 4.1509, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 11.387038158691702, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.005825938872457e-05, |
|
"loss": 4.1404, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 11.447607510599637, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.987899973111052e-05, |
|
"loss": 4.0629, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 11.508176862507572, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.969974007349646e-05, |
|
"loss": 4.0689, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.508176862507572, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 159.4675, |
|
"eval_samples_per_second": 19.665, |
|
"eval_steps_per_second": 1.229, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.568746214415505, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.952048041588241e-05, |
|
"loss": 4.0654, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 11.62931556632344, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.934122075826835e-05, |
|
"loss": 4.1354, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 11.689884918231375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.91619611006543e-05, |
|
"loss": 4.1234, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 11.75045427013931, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.898270144304025e-05, |
|
"loss": 4.1457, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 11.811023622047244, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.880523438200233e-05, |
|
"loss": 4.0984, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 11.811023622047244, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 154.4705, |
|
"eval_samples_per_second": 20.302, |
|
"eval_steps_per_second": 1.269, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 11.87159297395518, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.862776732096442e-05, |
|
"loss": 4.1671, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 11.932162325863114, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.84503002599265e-05, |
|
"loss": 4.1567, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 11.992731677771047, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.827104060231246e-05, |
|
"loss": 4.0959, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 12.053301029678982, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.80917809446984e-05, |
|
"loss": 4.1034, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 12.113870381586917, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.791252128708434e-05, |
|
"loss": 4.0569, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.113870381586917, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.855, |
|
"eval_samples_per_second": 19.993, |
|
"eval_steps_per_second": 1.25, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.174439733494852, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.773505422604644e-05, |
|
"loss": 4.1257, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 12.235009085402787, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.755579456843237e-05, |
|
"loss": 4.0668, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 12.29557843731072, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.737653491081832e-05, |
|
"loss": 4.0762, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 12.356147789218655, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.719727525320428e-05, |
|
"loss": 4.164, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 12.41671714112659, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.701980819216635e-05, |
|
"loss": 4.1462, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 12.41671714112659, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.4973, |
|
"eval_samples_per_second": 19.911, |
|
"eval_steps_per_second": 1.244, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 12.477286493034525, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.684234113112844e-05, |
|
"loss": 4.1516, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 12.53785584494246, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.666487407009053e-05, |
|
"loss": 4.1572, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 12.598425196850394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.648740700905262e-05, |
|
"loss": 4.0986, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 12.658994548758328, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.63099399480147e-05, |
|
"loss": 4.1606, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 12.719563900666262, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.613247288697679e-05, |
|
"loss": 4.1554, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 12.719563900666262, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.4079, |
|
"eval_samples_per_second": 19.923, |
|
"eval_steps_per_second": 1.245, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 12.780133252574197, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.595321322936273e-05, |
|
"loss": 4.0868, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 12.840702604482132, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.577395357174868e-05, |
|
"loss": 4.1136, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 12.901271956390067, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.559469391413463e-05, |
|
"loss": 4.1159, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 12.961841308298002, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.541543425652056e-05, |
|
"loss": 4.0535, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 13.022410660205935, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.52397597920588e-05, |
|
"loss": 4.2207, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 13.022410660205935, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.2944, |
|
"eval_samples_per_second": 20.065, |
|
"eval_steps_per_second": 1.254, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 13.08298001211387, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.506050013444474e-05, |
|
"loss": 4.0931, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 13.143549364021805, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.488303307340683e-05, |
|
"loss": 4.091, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 13.20411871592974, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.470556601236892e-05, |
|
"loss": 4.1211, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 13.264688067837675, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.452630635475486e-05, |
|
"loss": 4.1449, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 13.32525741974561, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.434883929371695e-05, |
|
"loss": 4.1518, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.32525741974561, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.1583, |
|
"eval_samples_per_second": 20.082, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.385826771653543, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.417316482925518e-05, |
|
"loss": 4.1448, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 13.446396123561478, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.399390517164113e-05, |
|
"loss": 4.0868, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 13.506965475469412, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.381464551402707e-05, |
|
"loss": 4.08, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 13.567534827377347, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.363538585641302e-05, |
|
"loss": 4.0581, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 13.628104179285282, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.345791879537511e-05, |
|
"loss": 4.1521, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 13.628104179285282, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.5812, |
|
"eval_samples_per_second": 20.028, |
|
"eval_steps_per_second": 1.252, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 13.688673531193217, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.327865913776105e-05, |
|
"loss": 4.117, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 13.74924288310115, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.309939948014699e-05, |
|
"loss": 4.1053, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 13.809812235009085, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.292013982253295e-05, |
|
"loss": 4.1245, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 13.87038158691702, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.27408801649189e-05, |
|
"loss": 4.1453, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 13.930950938824955, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.256341310388097e-05, |
|
"loss": 4.1367, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 13.930950938824955, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.1319, |
|
"eval_samples_per_second": 20.215, |
|
"eval_steps_per_second": 1.263, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 13.99152029073289, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.238415344626693e-05, |
|
"loss": 4.0836, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 14.052089642640825, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.220489378865286e-05, |
|
"loss": 4.1111, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 14.112658994548758, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.202742672761496e-05, |
|
"loss": 4.1495, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 14.173228346456693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.18481670700009e-05, |
|
"loss": 4.1216, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 14.233797698364627, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.167070000896298e-05, |
|
"loss": 4.0904, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 14.233797698364627, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 158.1109, |
|
"eval_samples_per_second": 19.834, |
|
"eval_steps_per_second": 1.24, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 14.294367050272562, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.149144035134893e-05, |
|
"loss": 4.1318, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 14.354936402180497, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.131576588688716e-05, |
|
"loss": 4.1688, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 14.415505754088432, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.114009142242538e-05, |
|
"loss": 4.163, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 14.476075105996365, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.096083176481133e-05, |
|
"loss": 4.121, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 14.5366444579043, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.078157210719728e-05, |
|
"loss": 4.0813, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.5366444579043, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 154.2508, |
|
"eval_samples_per_second": 20.331, |
|
"eval_steps_per_second": 1.271, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 14.597213809812235, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.060231244958322e-05, |
|
"loss": 4.1146, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 14.65778316172017, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.042305279196917e-05, |
|
"loss": 4.1089, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 14.718352513628105, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.0245585730931256e-05, |
|
"loss": 4.1355, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 14.778921865536038, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.0068118669893344e-05, |
|
"loss": 4.1671, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 14.839491217443973, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.9888859012279284e-05, |
|
"loss": 4.1001, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 14.839491217443973, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.2073, |
|
"eval_samples_per_second": 19.948, |
|
"eval_steps_per_second": 1.247, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 14.900060569351908, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.970959935466524e-05, |
|
"loss": 4.0844, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 14.960629921259843, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.953033969705118e-05, |
|
"loss": 4.0888, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 15.021199273167777, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.935108003943712e-05, |
|
"loss": 4.1109, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 15.081768625075712, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.9171820381823075e-05, |
|
"loss": 4.084, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 15.142337976983645, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.899435332078516e-05, |
|
"loss": 4.1333, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 15.142337976983645, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 157.3832, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.245, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 15.20290732889158, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.881688625974725e-05, |
|
"loss": 4.1437, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 15.263476680799515, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.8639419198709333e-05, |
|
"loss": 4.1461, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 15.32404603270745, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.846015954109528e-05, |
|
"loss": 4.1283, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.828089988348122e-05, |
|
"loss": 4.0933, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 15.44518473652332, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.810164022586717e-05, |
|
"loss": 4.0785, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 15.44518473652332, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 155.5837, |
|
"eval_samples_per_second": 20.156, |
|
"eval_steps_per_second": 1.26, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 15.505754088431253, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.792238056825312e-05, |
|
"loss": 4.0693, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 15.566323440339188, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.7744913507215207e-05, |
|
"loss": 4.1328, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 15.626892792247123, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.756744644617729e-05, |
|
"loss": 4.0924, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 15.687462144155058, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.7388186788563234e-05, |
|
"loss": 4.1385, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 15.748031496062993, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.7212512324101464e-05, |
|
"loss": 4.1651, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 15.748031496062993, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 159.7055, |
|
"eval_samples_per_second": 19.636, |
|
"eval_steps_per_second": 1.227, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 15.808600847970927, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.703325266648741e-05, |
|
"loss": 4.1309, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 15.86917019987886, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.685399300887335e-05, |
|
"loss": 4.1694, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 15.929739551786795, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.66747333512593e-05, |
|
"loss": 4.0989, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 15.99030890369473, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.6497266290221385e-05, |
|
"loss": 4.1099, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 16.050878255602665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.631800663260733e-05, |
|
"loss": 4.0987, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 16.050878255602665, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.236, |
|
"eval_samples_per_second": 20.072, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 16.1114476075106, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.614053957156942e-05, |
|
"loss": 4.1633, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 16.172016959418535, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5963072510531514e-05, |
|
"loss": 4.1029, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 16.232586311326468, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5785605449493595e-05, |
|
"loss": 4.1551, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 16.293155663234405, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.560634579187954e-05, |
|
"loss": 4.0991, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 16.353725015142338, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.542708613426548e-05, |
|
"loss": 4.1327, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 16.353725015142338, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.6857, |
|
"eval_samples_per_second": 20.015, |
|
"eval_steps_per_second": 1.251, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 16.41429436705027, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5249619073227576e-05, |
|
"loss": 4.1288, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 16.474863718958208, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5070359415613516e-05, |
|
"loss": 4.1022, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 16.53543307086614, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.489289235457561e-05, |
|
"loss": 4.1601, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 16.596002422774077, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.471363269696155e-05, |
|
"loss": 4.1122, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 16.65657177468201, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.4536165635923645e-05, |
|
"loss": 4.1128, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 16.65657177468201, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.7138, |
|
"eval_samples_per_second": 20.011, |
|
"eval_steps_per_second": 1.251, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 16.717141126589944, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.435690597830958e-05, |
|
"loss": 4.1182, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 16.77771047849788, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.417764632069553e-05, |
|
"loss": 4.09, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 16.838279830405813, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.399838666308148e-05, |
|
"loss": 4.0946, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 16.89884918231375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.3819127005467416e-05, |
|
"loss": 4.0853, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 16.959418534221683, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.364345254100565e-05, |
|
"loss": 4.0694, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 16.959418534221683, |
|
"eval_cer": 0.8919898495106757, |
|
"eval_loss": 4.137392520904541, |
|
"eval_runtime": 156.949, |
|
"eval_samples_per_second": 19.981, |
|
"eval_steps_per_second": 1.249, |
|
"eval_wer": 0.9964900151567527, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 17.01998788612962, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.346598547996774e-05, |
|
"loss": 4.1954, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 17.080557238037553, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.328851841892982e-05, |
|
"loss": 4.1889, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 17.141126589945486, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.310925876131577e-05, |
|
"loss": 4.0488, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 17.201695941853423, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.292999910370171e-05, |
|
"loss": 4.0962, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 17.262265293761356, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 5.946, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 17.262265293761356, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.5364, |
|
"eval_samples_per_second": 19.781, |
|
"eval_steps_per_second": 1.236, |
|
"eval_wer": 1.0, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 17.322834645669293, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 17.383403997577226, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 17.44397334948516, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 17.504542701393095, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 17.56511205330103, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 17.56511205330103, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 155.6448, |
|
"eval_samples_per_second": 20.148, |
|
"eval_steps_per_second": 1.259, |
|
"eval_wer": 1.0, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 17.625681405208965, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 17.6862507571169, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 17.746820109024835, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 17.807389460932768, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 17.8679588128407, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 17.8679588128407, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.5604, |
|
"eval_samples_per_second": 19.778, |
|
"eval_steps_per_second": 1.236, |
|
"eval_wer": 1.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 17.928528164748638, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 17.98909751665657, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 18.049666868564508, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 18.11023622047244, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 18.170805572380374, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 18.170805572380374, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 154.6546, |
|
"eval_samples_per_second": 20.277, |
|
"eval_steps_per_second": 1.267, |
|
"eval_wer": 1.0, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 18.23137492428831, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 18.291944276196244, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 18.35251362810418, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 18.413082980012113, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 18.47365233192005, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 18.47365233192005, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.9964, |
|
"eval_samples_per_second": 19.975, |
|
"eval_steps_per_second": 1.248, |
|
"eval_wer": 1.0, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 18.534221683827983, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 18.594791035735916, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 18.655360387643853, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 18.715929739551786, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 18.776499091459723, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 18.776499091459723, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 159.577, |
|
"eval_samples_per_second": 19.652, |
|
"eval_steps_per_second": 1.228, |
|
"eval_wer": 1.0, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 18.837068443367656, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 18.89763779527559, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 18.958207147183526, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 19.01877649909146, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 19.079345850999395, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 19.079345850999395, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 154.8715, |
|
"eval_samples_per_second": 20.249, |
|
"eval_steps_per_second": 1.266, |
|
"eval_wer": 1.0, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 19.13991520290733, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 19.200484554815265, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 19.2610539067232, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 19.32162325863113, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 19.382192610539068, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 19.382192610539068, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.5535, |
|
"eval_samples_per_second": 19.904, |
|
"eval_steps_per_second": 1.244, |
|
"eval_wer": 1.0, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 19.442761962447, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 19.503331314354938, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 19.56390066626287, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 19.624470018170804, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 19.68503937007874, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 19.68503937007874, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.5949, |
|
"eval_samples_per_second": 20.026, |
|
"eval_steps_per_second": 1.252, |
|
"eval_wer": 1.0, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 19.745608721986674, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 19.80617807389461, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 19.866747425802544, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 19.92731677771048, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 19.987886129618413, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 19.987886129618413, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.5471, |
|
"eval_samples_per_second": 19.78, |
|
"eval_steps_per_second": 1.236, |
|
"eval_wer": 1.0, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 20.048455481526346, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 20.109024833434283, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 20.169594185342216, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 20.230163537250153, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 20.290732889158086, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 20.290732889158086, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 155.0807, |
|
"eval_samples_per_second": 20.222, |
|
"eval_steps_per_second": 1.264, |
|
"eval_wer": 1.0, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 20.35130224106602, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 20.411871592973956, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 20.47244094488189, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 20.533010296789826, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 20.59357964869776, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 20.59357964869776, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.3553, |
|
"eval_samples_per_second": 20.057, |
|
"eval_steps_per_second": 1.254, |
|
"eval_wer": 1.0, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 20.654149000605692, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 20.71471835251363, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 20.77528770442156, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 20.8358570563295, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 20.89642640823743, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 20.89642640823743, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 159.3794, |
|
"eval_samples_per_second": 19.676, |
|
"eval_steps_per_second": 1.23, |
|
"eval_wer": 1.0, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 20.956995760145368, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 21.0175651120533, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 21.078134463961234, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 21.13870381586917, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 21.199273167777104, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 21.199273167777104, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 154.6365, |
|
"eval_samples_per_second": 20.28, |
|
"eval_steps_per_second": 1.267, |
|
"eval_wer": 1.0, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 21.25984251968504, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 21.320411871592974, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 21.380981223500907, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 21.441550575408844, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 21.502119927316777, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 21.502119927316777, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 165.5303, |
|
"eval_samples_per_second": 18.945, |
|
"eval_steps_per_second": 1.184, |
|
"eval_wer": 1.0, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 21.562689279224713, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 21.623258631132646, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 21.683827983040583, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 21.744397334948516, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 21.80496668685645, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 21.80496668685645, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.2442, |
|
"eval_samples_per_second": 19.944, |
|
"eval_steps_per_second": 1.246, |
|
"eval_wer": 1.0, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 21.865536038764386, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 21.92610539067232, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 21.986674742580256, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 22.04724409448819, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 22.107813446396122, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 22.107813446396122, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 155.4803, |
|
"eval_samples_per_second": 20.17, |
|
"eval_steps_per_second": 1.261, |
|
"eval_wer": 1.0, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 22.16838279830406, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 22.228952150211992, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 22.28952150211993, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 22.35009085402786, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 22.410660205935798, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 22.410660205935798, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.7983, |
|
"eval_samples_per_second": 19.748, |
|
"eval_steps_per_second": 1.234, |
|
"eval_wer": 1.0, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 22.47122955784373, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 22.531798909751664, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 22.5923682616596, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 22.652937613567534, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 22.71350696547547, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 22.71350696547547, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.0134, |
|
"eval_samples_per_second": 20.101, |
|
"eval_steps_per_second": 1.256, |
|
"eval_wer": 1.0, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 22.774076317383404, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 22.834645669291337, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 22.895215021199274, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 22.955784373107207, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 23.016353725015144, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 23.016353725015144, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.6663, |
|
"eval_samples_per_second": 19.765, |
|
"eval_steps_per_second": 1.235, |
|
"eval_wer": 1.0, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 23.076923076923077, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 23.13749242883101, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 23.198061780738946, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 23.25863113264688, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 23.319200484554816, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 23.319200484554816, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 159.6009, |
|
"eval_samples_per_second": 19.649, |
|
"eval_steps_per_second": 1.228, |
|
"eval_wer": 1.0, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 23.37976983646275, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 23.440339188370686, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 23.50090854027862, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 23.561477892186552, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 23.62204724409449, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 23.62204724409449, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.326, |
|
"eval_samples_per_second": 20.061, |
|
"eval_steps_per_second": 1.254, |
|
"eval_wer": 1.0, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 23.682616596002422, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 23.74318594791036, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 23.80375529981829, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 23.864324651726225, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 23.92489400363416, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 23.92489400363416, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 161.8953, |
|
"eval_samples_per_second": 19.371, |
|
"eval_steps_per_second": 1.211, |
|
"eval_wer": 1.0, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 23.985463355542095, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 24.04603270745003, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 24.106602059357964, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 24.1671714112659, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 24.227740763173834, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 24.227740763173834, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 154.7401, |
|
"eval_samples_per_second": 20.266, |
|
"eval_steps_per_second": 1.267, |
|
"eval_wer": 1.0, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 24.288310115081767, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 24.348879466989704, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 24.409448818897637, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 24.470018170805574, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 24.530587522713507, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 24.530587522713507, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 159.4245, |
|
"eval_samples_per_second": 19.671, |
|
"eval_steps_per_second": 1.229, |
|
"eval_wer": 1.0, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 24.59115687462144, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 24.651726226529377, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 24.71229557843731, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 24.772864930345246, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 24.83343428225318, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 24.83343428225318, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 155.962, |
|
"eval_samples_per_second": 20.107, |
|
"eval_steps_per_second": 1.257, |
|
"eval_wer": 1.0, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 24.894003634161116, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 24.95457298606905, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 25.015142337976982, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 25.07571168988492, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 25.136281041792852, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 25.136281041792852, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.4004, |
|
"eval_samples_per_second": 20.051, |
|
"eval_steps_per_second": 1.253, |
|
"eval_wer": 1.0, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 25.19685039370079, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 25.257419745608722, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 25.317989097516655, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 25.37855844942459, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 25.439127801332525, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 25.439127801332525, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.9915, |
|
"eval_samples_per_second": 19.849, |
|
"eval_steps_per_second": 1.241, |
|
"eval_wer": 1.0, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 25.49969715324046, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 25.560266505148395, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 25.620835857056328, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 25.681405208964264, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 25.741974560872197, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 25.741974560872197, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 161.4679, |
|
"eval_samples_per_second": 19.422, |
|
"eval_steps_per_second": 1.214, |
|
"eval_wer": 1.0, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 25.802543912780134, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 25.863113264688067, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 25.923682616596004, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 25.984251968503937, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 26.04482132041187, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 26.04482132041187, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 155.5452, |
|
"eval_samples_per_second": 20.161, |
|
"eval_steps_per_second": 1.26, |
|
"eval_wer": 1.0, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 26.105390672319807, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 26.16596002422774, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 26.226529376135677, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 26.28709872804361, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 26.347668079951543, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 26.347668079951543, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 160.9947, |
|
"eval_samples_per_second": 19.479, |
|
"eval_steps_per_second": 1.217, |
|
"eval_wer": 1.0, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 26.40823743185948, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 26.468806783767413, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 26.52937613567535, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 26.589945487583282, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 26.65051483949122, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 26.65051483949122, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.8158, |
|
"eval_samples_per_second": 19.871, |
|
"eval_steps_per_second": 1.242, |
|
"eval_wer": 1.0, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 26.711084191399152, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 26.771653543307085, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 26.832222895215022, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 26.892792247122955, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 26.95336159903089, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 26.95336159903089, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.1889, |
|
"eval_samples_per_second": 20.078, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 1.0, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 27.013930950938825, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 27.074500302846758, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 27.135069654754695, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 27.195639006662628, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 27.256208358570564, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 27.256208358570564, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.4428, |
|
"eval_samples_per_second": 20.046, |
|
"eval_steps_per_second": 1.253, |
|
"eval_wer": 1.0, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 27.316777710478497, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 27.377347062386434, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 27.437916414294367, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 27.4984857662023, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 27.559055118110237, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 27.559055118110237, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 162.8125, |
|
"eval_samples_per_second": 19.261, |
|
"eval_steps_per_second": 1.204, |
|
"eval_wer": 1.0, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 27.61962447001817, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 27.680193821926107, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 27.74076317383404, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 27.801332525741973, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 27.86190187764991, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 27.86190187764991, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 167.9233, |
|
"eval_samples_per_second": 18.675, |
|
"eval_steps_per_second": 1.167, |
|
"eval_wer": 1.0, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 27.922471229557843, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 27.98304058146578, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 28.043609933373713, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 28.10417928528165, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 28.164748637189582, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 28.164748637189582, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.9935, |
|
"eval_samples_per_second": 19.724, |
|
"eval_steps_per_second": 1.233, |
|
"eval_wer": 1.0, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 28.225317989097515, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 28.285887341005452, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 28.346456692913385, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 28.407026044821322, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 28.467595396729255, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 28.467595396729255, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.4249, |
|
"eval_samples_per_second": 20.048, |
|
"eval_steps_per_second": 1.253, |
|
"eval_wer": 1.0, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 28.528164748637188, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 28.588734100545125, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 28.649303452453058, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 28.709872804360995, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 28.770442156268928, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 28.770442156268928, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 160.7633, |
|
"eval_samples_per_second": 19.507, |
|
"eval_steps_per_second": 1.219, |
|
"eval_wer": 1.0, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 28.831011508176864, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 28.891580860084797, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 28.95215021199273, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 29.012719563900667, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 29.0732889158086, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 29.0732889158086, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.7953, |
|
"eval_samples_per_second": 20.001, |
|
"eval_steps_per_second": 1.25, |
|
"eval_wer": 1.0, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 29.133858267716537, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 29.19442761962447, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 29.254996971532403, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 29.31556632344034, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 29.376135675348273, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 29.376135675348273, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.9582, |
|
"eval_samples_per_second": 19.98, |
|
"eval_steps_per_second": 1.249, |
|
"eval_wer": 1.0, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 29.43670502725621, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 29.497274379164143, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 29.557843731072076, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 29.618413082980013, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 29.678982434887946, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 29.678982434887946, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.9824, |
|
"eval_samples_per_second": 19.725, |
|
"eval_steps_per_second": 1.233, |
|
"eval_wer": 1.0, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 29.739551786795882, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 29.800121138703815, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 29.860690490611752, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 29.921259842519685, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 29.98182919442762, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 29.98182919442762, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.4338, |
|
"eval_samples_per_second": 19.794, |
|
"eval_steps_per_second": 1.237, |
|
"eval_wer": 1.0, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 30.042398546335555, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 30.102967898243488, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 30.163537250151425, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 30.224106602059358, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 30.28467595396729, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 30.28467595396729, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 158.7285, |
|
"eval_samples_per_second": 19.757, |
|
"eval_steps_per_second": 1.235, |
|
"eval_wer": 1.0, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 30.345245305875228, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 30.40581465778316, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 30.466384009691097, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 30.52695336159903, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 30.587522713506967, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 30.587522713506967, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.467, |
|
"eval_samples_per_second": 19.915, |
|
"eval_steps_per_second": 1.245, |
|
"eval_wer": 1.0, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 30.6480920654149, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 30.708661417322833, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 30.829800121138703, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 30.89036947304664, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 30.89036947304664, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 164.5577, |
|
"eval_samples_per_second": 19.057, |
|
"eval_steps_per_second": 1.191, |
|
"eval_wer": 1.0, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 30.950938824954573, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 31.011508176862506, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 31.072077528770443, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 31.132646880678376, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 31.193216232586312, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 31.193216232586312, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.8522, |
|
"eval_samples_per_second": 19.867, |
|
"eval_steps_per_second": 1.242, |
|
"eval_wer": 1.0, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 31.253785584494246, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 31.314354936402182, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 31.374924288310115, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 31.43549364021805, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 31.496062992125985, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 31.496062992125985, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.1362, |
|
"eval_samples_per_second": 20.085, |
|
"eval_steps_per_second": 1.255, |
|
"eval_wer": 1.0, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 31.55663234403392, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 31.617201695941855, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 31.677771047849788, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 31.73834039975772, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 31.798909751665658, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 31.798909751665658, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 163.2532, |
|
"eval_samples_per_second": 19.209, |
|
"eval_steps_per_second": 1.201, |
|
"eval_wer": 1.0, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 31.85947910357359, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 31.920048455481528, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 31.98061780738946, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 32.041187159297394, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 32.10175651120533, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 32.10175651120533, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.1574, |
|
"eval_samples_per_second": 19.955, |
|
"eval_steps_per_second": 1.247, |
|
"eval_wer": 1.0, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 32.16232586311327, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 32.2228952150212, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 32.28346456692913, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 32.34403391883707, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 32.404603270745, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 32.404603270745, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.2934, |
|
"eval_samples_per_second": 19.937, |
|
"eval_steps_per_second": 1.246, |
|
"eval_wer": 1.0, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 32.465172622652936, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 32.52574197456087, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 32.58631132646881, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 32.64688067837674, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 32.707450030284676, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 32.707450030284676, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.1352, |
|
"eval_samples_per_second": 19.957, |
|
"eval_steps_per_second": 1.247, |
|
"eval_wer": 1.0, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 32.76801938219261, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 32.82858873410054, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 32.88915808600848, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 32.949727437916415, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 33.01029678982435, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 33.01029678982435, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 159.5913, |
|
"eval_samples_per_second": 19.65, |
|
"eval_steps_per_second": 1.228, |
|
"eval_wer": 1.0, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 33.07086614173228, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 33.13143549364022, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 33.192004845548155, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 33.252574197456084, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 33.31314354936402, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 33.31314354936402, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.6924, |
|
"eval_samples_per_second": 20.014, |
|
"eval_steps_per_second": 1.251, |
|
"eval_wer": 1.0, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 33.37371290127196, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 33.434282253179894, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 33.494851605087824, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 33.55542095699576, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 33.6159903089037, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 33.6159903089037, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 159.9698, |
|
"eval_samples_per_second": 19.604, |
|
"eval_steps_per_second": 1.225, |
|
"eval_wer": 1.0, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 33.67655966081163, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 33.73712901271956, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 33.7976983646275, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 33.85826771653543, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 33.918837068443366, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 33.918837068443366, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 160.7849, |
|
"eval_samples_per_second": 19.504, |
|
"eval_steps_per_second": 1.219, |
|
"eval_wer": 1.0, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 33.9794064203513, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 34.03997577225924, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 34.10054512416717, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 34.161114476075106, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 34.22168382798304, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 34.22168382798304, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 156.7572, |
|
"eval_samples_per_second": 20.005, |
|
"eval_steps_per_second": 1.25, |
|
"eval_wer": 1.0, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 34.28225317989097, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 34.34282253179891, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 34.403391883706846, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 34.46396123561478, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 34.52453058752271, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 34.52453058752271, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.0934, |
|
"eval_samples_per_second": 19.963, |
|
"eval_steps_per_second": 1.248, |
|
"eval_wer": 1.0, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 34.58509993943065, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 34.645669291338585, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 34.706238643246515, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 34.76680799515445, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 34.82737734706239, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 34.82737734706239, |
|
"eval_cer": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 157.6451, |
|
"eval_samples_per_second": 19.893, |
|
"eval_steps_per_second": 1.243, |
|
"eval_wer": 1.0, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 34.88794669897032, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 34.948516050878254, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.281168772967644e-05, |
|
"loss": 0.0, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"step": 57785, |
|
"total_flos": 3.5484642655073436e+20, |
|
"train_loss": 2.001046013029593, |
|
"train_runtime": 176741.4087, |
|
"train_samples_per_second": 10.461, |
|
"train_steps_per_second": 0.327 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 57785, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.5484642655073436e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|