{ "best_global_step": 7500, "best_metric": 8.595293694988323, "best_model_checkpoint": "./checkpoint-7500", "epoch": 2.300705882352941, "eval_steps": 500, "global_step": 8500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000588235294117647, "grad_norm": 9.210806846618652, "learning_rate": 3.2e-08, "loss": 0.66, "step": 5 }, { "epoch": 0.001176470588235294, "grad_norm": 7.5304341316223145, "learning_rate": 7.2e-08, "loss": 0.5352, "step": 10 }, { "epoch": 0.0017647058823529412, "grad_norm": 6.866551876068115, "learning_rate": 1.12e-07, "loss": 0.5319, "step": 15 }, { "epoch": 0.002352941176470588, "grad_norm": 8.664909362792969, "learning_rate": 1.5199999999999998e-07, "loss": 0.5559, "step": 20 }, { "epoch": 0.0029411764705882353, "grad_norm": 8.310157775878906, "learning_rate": 1.92e-07, "loss": 0.5181, "step": 25 }, { "epoch": 0.0035294117647058825, "grad_norm": 7.670085906982422, "learning_rate": 2.32e-07, "loss": 0.5433, "step": 30 }, { "epoch": 0.00411764705882353, "grad_norm": 9.368919372558594, "learning_rate": 2.72e-07, "loss": 0.4754, "step": 35 }, { "epoch": 0.004705882352941176, "grad_norm": 8.820810317993164, "learning_rate": 3.12e-07, "loss": 0.4865, "step": 40 }, { "epoch": 0.005294117647058823, "grad_norm": 11.132355690002441, "learning_rate": 3.52e-07, "loss": 0.4226, "step": 45 }, { "epoch": 0.0058823529411764705, "grad_norm": 10.19275188446045, "learning_rate": 3.92e-07, "loss": 0.3532, "step": 50 }, { "epoch": 0.006470588235294118, "grad_norm": 7.569909572601318, "learning_rate": 4.3199999999999995e-07, "loss": 0.3074, "step": 55 }, { "epoch": 0.007058823529411765, "grad_norm": 8.587510108947754, "learning_rate": 4.7199999999999994e-07, "loss": 0.3602, "step": 60 }, { "epoch": 0.007647058823529412, "grad_norm": 7.001413345336914, "learning_rate": 5.12e-07, "loss": 0.2923, "step": 65 }, { "epoch": 0.00823529411764706, "grad_norm": 6.358983993530273, "learning_rate": 5.520000000000001e-07, "loss": 0.2817, "step": 70 }, { "epoch": 0.008823529411764706, "grad_norm": 6.702334880828857, "learning_rate": 5.919999999999999e-07, "loss": 0.2965, "step": 75 }, { "epoch": 0.009411764705882352, "grad_norm": 6.38828182220459, "learning_rate": 6.319999999999999e-07, "loss": 0.2909, "step": 80 }, { "epoch": 0.01, "grad_norm": 7.688418865203857, "learning_rate": 6.72e-07, "loss": 0.2558, "step": 85 }, { "epoch": 0.010588235294117647, "grad_norm": 6.000365734100342, "learning_rate": 7.119999999999999e-07, "loss": 0.2538, "step": 90 }, { "epoch": 0.011176470588235295, "grad_norm": 7.743271350860596, "learning_rate": 7.52e-07, "loss": 0.289, "step": 95 }, { "epoch": 0.011764705882352941, "grad_norm": 5.441481590270996, "learning_rate": 7.92e-07, "loss": 0.2755, "step": 100 }, { "epoch": 0.012352941176470587, "grad_norm": 4.611705303192139, "learning_rate": 8.319999999999999e-07, "loss": 0.2601, "step": 105 }, { "epoch": 0.012941176470588235, "grad_norm": 5.385910511016846, "learning_rate": 8.72e-07, "loss": 0.2331, "step": 110 }, { "epoch": 0.013529411764705882, "grad_norm": 6.312107086181641, "learning_rate": 9.12e-07, "loss": 0.2367, "step": 115 }, { "epoch": 0.01411764705882353, "grad_norm": 5.791952133178711, "learning_rate": 9.52e-07, "loss": 0.2815, "step": 120 }, { "epoch": 0.014705882352941176, "grad_norm": 4.852151870727539, "learning_rate": 9.92e-07, "loss": 0.2801, "step": 125 }, { "epoch": 0.015294117647058824, "grad_norm": 5.936627388000488, "learning_rate": 1.032e-06, "loss": 0.3376, "step": 130 }, { "epoch": 0.01588235294117647, "grad_norm": 5.612387180328369, "learning_rate": 1.072e-06, "loss": 0.3409, "step": 135 }, { "epoch": 0.01647058823529412, "grad_norm": 3.4502546787261963, "learning_rate": 1.1120000000000001e-06, "loss": 0.2587, "step": 140 }, { "epoch": 0.017058823529411765, "grad_norm": 5.075406074523926, "learning_rate": 1.152e-06, "loss": 0.304, "step": 145 }, { "epoch": 0.01764705882352941, "grad_norm": 3.6106865406036377, "learning_rate": 1.192e-06, "loss": 0.2361, "step": 150 }, { "epoch": 0.018235294117647058, "grad_norm": 4.649672508239746, "learning_rate": 1.232e-06, "loss": 0.2309, "step": 155 }, { "epoch": 0.018823529411764704, "grad_norm": 4.335810661315918, "learning_rate": 1.272e-06, "loss": 0.2219, "step": 160 }, { "epoch": 0.019411764705882354, "grad_norm": 3.526329755783081, "learning_rate": 1.312e-06, "loss": 0.2507, "step": 165 }, { "epoch": 0.02, "grad_norm": 4.524788856506348, "learning_rate": 1.352e-06, "loss": 0.2713, "step": 170 }, { "epoch": 0.020588235294117647, "grad_norm": 3.9998507499694824, "learning_rate": 1.3919999999999998e-06, "loss": 0.2439, "step": 175 }, { "epoch": 0.021176470588235293, "grad_norm": 4.73967170715332, "learning_rate": 1.4319999999999998e-06, "loss": 0.2429, "step": 180 }, { "epoch": 0.02176470588235294, "grad_norm": 5.148312568664551, "learning_rate": 1.4719999999999998e-06, "loss": 0.2473, "step": 185 }, { "epoch": 0.02235294117647059, "grad_norm": 6.347844123840332, "learning_rate": 1.5119999999999999e-06, "loss": 0.2389, "step": 190 }, { "epoch": 0.022941176470588236, "grad_norm": 4.1194939613342285, "learning_rate": 1.552e-06, "loss": 0.2234, "step": 195 }, { "epoch": 0.023529411764705882, "grad_norm": 4.192361354827881, "learning_rate": 1.592e-06, "loss": 0.2163, "step": 200 }, { "epoch": 0.02411764705882353, "grad_norm": 3.418804407119751, "learning_rate": 1.6319999999999998e-06, "loss": 0.2002, "step": 205 }, { "epoch": 0.024705882352941175, "grad_norm": 4.223726749420166, "learning_rate": 1.6719999999999998e-06, "loss": 0.2377, "step": 210 }, { "epoch": 0.025294117647058825, "grad_norm": 4.812453746795654, "learning_rate": 1.7119999999999999e-06, "loss": 0.1894, "step": 215 }, { "epoch": 0.02588235294117647, "grad_norm": 3.7552876472473145, "learning_rate": 1.752e-06, "loss": 0.2188, "step": 220 }, { "epoch": 0.026470588235294117, "grad_norm": 4.382972240447998, "learning_rate": 1.792e-06, "loss": 0.1904, "step": 225 }, { "epoch": 0.027058823529411764, "grad_norm": 4.380246639251709, "learning_rate": 1.832e-06, "loss": 0.2293, "step": 230 }, { "epoch": 0.027647058823529413, "grad_norm": 3.448075294494629, "learning_rate": 1.872e-06, "loss": 0.2033, "step": 235 }, { "epoch": 0.02823529411764706, "grad_norm": 3.4712142944335938, "learning_rate": 1.9119999999999997e-06, "loss": 0.1602, "step": 240 }, { "epoch": 0.028823529411764706, "grad_norm": 5.18366813659668, "learning_rate": 1.9519999999999997e-06, "loss": 0.2001, "step": 245 }, { "epoch": 0.029411764705882353, "grad_norm": 6.266733646392822, "learning_rate": 1.9919999999999997e-06, "loss": 0.2077, "step": 250 }, { "epoch": 0.03, "grad_norm": 4.826927185058594, "learning_rate": 2.0319999999999998e-06, "loss": 0.2299, "step": 255 }, { "epoch": 0.03058823529411765, "grad_norm": 5.940469264984131, "learning_rate": 2.072e-06, "loss": 0.2306, "step": 260 }, { "epoch": 0.031176470588235295, "grad_norm": 5.03666877746582, "learning_rate": 2.112e-06, "loss": 0.2429, "step": 265 }, { "epoch": 0.03176470588235294, "grad_norm": 6.458746433258057, "learning_rate": 2.152e-06, "loss": 0.2395, "step": 270 }, { "epoch": 0.03235294117647059, "grad_norm": 5.837243556976318, "learning_rate": 2.192e-06, "loss": 0.2176, "step": 275 }, { "epoch": 0.03294117647058824, "grad_norm": 4.788494110107422, "learning_rate": 2.232e-06, "loss": 0.2303, "step": 280 }, { "epoch": 0.033529411764705884, "grad_norm": 5.0417375564575195, "learning_rate": 2.2719999999999996e-06, "loss": 0.2302, "step": 285 }, { "epoch": 0.03411764705882353, "grad_norm": 5.684013843536377, "learning_rate": 2.3119999999999996e-06, "loss": 0.2238, "step": 290 }, { "epoch": 0.03470588235294118, "grad_norm": 6.517688751220703, "learning_rate": 2.3519999999999997e-06, "loss": 0.2334, "step": 295 }, { "epoch": 0.03529411764705882, "grad_norm": 6.103775501251221, "learning_rate": 2.3919999999999997e-06, "loss": 0.2469, "step": 300 }, { "epoch": 0.03588235294117647, "grad_norm": 5.707073211669922, "learning_rate": 2.4319999999999998e-06, "loss": 0.2514, "step": 305 }, { "epoch": 0.036470588235294116, "grad_norm": 5.96298360824585, "learning_rate": 2.472e-06, "loss": 0.3873, "step": 310 }, { "epoch": 0.03705882352941176, "grad_norm": 7.544048309326172, "learning_rate": 2.512e-06, "loss": 0.4071, "step": 315 }, { "epoch": 0.03764705882352941, "grad_norm": 4.868683815002441, "learning_rate": 2.552e-06, "loss": 0.384, "step": 320 }, { "epoch": 0.03823529411764706, "grad_norm": 4.617959976196289, "learning_rate": 2.592e-06, "loss": 0.4013, "step": 325 }, { "epoch": 0.03882352941176471, "grad_norm": 4.733787536621094, "learning_rate": 2.632e-06, "loss": 0.4399, "step": 330 }, { "epoch": 0.039411764705882354, "grad_norm": 4.569477081298828, "learning_rate": 2.672e-06, "loss": 0.4483, "step": 335 }, { "epoch": 0.04, "grad_norm": 4.225634574890137, "learning_rate": 2.712e-06, "loss": 0.3696, "step": 340 }, { "epoch": 0.04058823529411765, "grad_norm": 5.4749908447265625, "learning_rate": 2.7519999999999997e-06, "loss": 0.4565, "step": 345 }, { "epoch": 0.041176470588235294, "grad_norm": 4.12764835357666, "learning_rate": 2.7919999999999997e-06, "loss": 0.4195, "step": 350 }, { "epoch": 0.04176470588235294, "grad_norm": 4.8739447593688965, "learning_rate": 2.8319999999999997e-06, "loss": 0.3572, "step": 355 }, { "epoch": 0.042352941176470586, "grad_norm": 5.095338821411133, "learning_rate": 2.8719999999999998e-06, "loss": 0.4048, "step": 360 }, { "epoch": 0.04294117647058823, "grad_norm": 4.831172466278076, "learning_rate": 2.912e-06, "loss": 0.3647, "step": 365 }, { "epoch": 0.04352941176470588, "grad_norm": 4.184713840484619, "learning_rate": 2.952e-06, "loss": 0.3425, "step": 370 }, { "epoch": 0.04411764705882353, "grad_norm": 5.462978839874268, "learning_rate": 2.992e-06, "loss": 0.3083, "step": 375 }, { "epoch": 0.04470588235294118, "grad_norm": 4.592141628265381, "learning_rate": 3.032e-06, "loss": 0.2794, "step": 380 }, { "epoch": 0.045294117647058825, "grad_norm": 4.346080303192139, "learning_rate": 3.072e-06, "loss": 0.2465, "step": 385 }, { "epoch": 0.04588235294117647, "grad_norm": 3.630915641784668, "learning_rate": 3.112e-06, "loss": 0.1872, "step": 390 }, { "epoch": 0.04647058823529412, "grad_norm": 4.339008808135986, "learning_rate": 3.152e-06, "loss": 0.211, "step": 395 }, { "epoch": 0.047058823529411764, "grad_norm": 4.775891304016113, "learning_rate": 3.192e-06, "loss": 0.2407, "step": 400 }, { "epoch": 0.04764705882352941, "grad_norm": 4.156243801116943, "learning_rate": 3.232e-06, "loss": 0.2395, "step": 405 }, { "epoch": 0.04823529411764706, "grad_norm": 5.370198726654053, "learning_rate": 3.2719999999999998e-06, "loss": 0.3505, "step": 410 }, { "epoch": 0.0488235294117647, "grad_norm": 4.624802589416504, "learning_rate": 3.312e-06, "loss": 0.3193, "step": 415 }, { "epoch": 0.04941176470588235, "grad_norm": 4.849734306335449, "learning_rate": 3.352e-06, "loss": 0.2446, "step": 420 }, { "epoch": 0.05, "grad_norm": 5.265301704406738, "learning_rate": 3.392e-06, "loss": 0.2965, "step": 425 }, { "epoch": 0.05058823529411765, "grad_norm": 4.785882949829102, "learning_rate": 3.432e-06, "loss": 0.2696, "step": 430 }, { "epoch": 0.051176470588235295, "grad_norm": 4.0059332847595215, "learning_rate": 3.472e-06, "loss": 0.3235, "step": 435 }, { "epoch": 0.05176470588235294, "grad_norm": 4.41887903213501, "learning_rate": 3.512e-06, "loss": 0.4004, "step": 440 }, { "epoch": 0.05235294117647059, "grad_norm": 5.460892677307129, "learning_rate": 3.552e-06, "loss": 0.3894, "step": 445 }, { "epoch": 0.052941176470588235, "grad_norm": 4.12983512878418, "learning_rate": 3.592e-06, "loss": 0.2938, "step": 450 }, { "epoch": 0.05352941176470588, "grad_norm": 4.574638843536377, "learning_rate": 3.632e-06, "loss": 0.3518, "step": 455 }, { "epoch": 0.05411764705882353, "grad_norm": 5.826401710510254, "learning_rate": 3.672e-06, "loss": 0.4808, "step": 460 }, { "epoch": 0.054705882352941174, "grad_norm": 5.214783668518066, "learning_rate": 3.712e-06, "loss": 0.4049, "step": 465 }, { "epoch": 0.05529411764705883, "grad_norm": 4.931481838226318, "learning_rate": 3.7519999999999994e-06, "loss": 0.4363, "step": 470 }, { "epoch": 0.05588235294117647, "grad_norm": 5.0569539070129395, "learning_rate": 3.7919999999999994e-06, "loss": 0.3297, "step": 475 }, { "epoch": 0.05647058823529412, "grad_norm": 4.543420314788818, "learning_rate": 3.832e-06, "loss": 0.3466, "step": 480 }, { "epoch": 0.057058823529411766, "grad_norm": 4.1955180168151855, "learning_rate": 3.8719999999999995e-06, "loss": 0.3346, "step": 485 }, { "epoch": 0.05764705882352941, "grad_norm": 5.060678482055664, "learning_rate": 3.912e-06, "loss": 0.4337, "step": 490 }, { "epoch": 0.05823529411764706, "grad_norm": 4.680274963378906, "learning_rate": 3.952e-06, "loss": 0.3525, "step": 495 }, { "epoch": 0.058823529411764705, "grad_norm": 5.270072937011719, "learning_rate": 3.992e-06, "loss": 0.3163, "step": 500 }, { "epoch": 0.058823529411764705, "eval_loss": 0.2426382452249527, "eval_runtime": 217.6364, "eval_samples_per_second": 8.101, "eval_steps_per_second": 2.026, "eval_wer": 14.12789653314173, "step": 500 }, { "epoch": 0.05941176470588235, "grad_norm": 6.508598327636719, "learning_rate": 3.998e-06, "loss": 0.387, "step": 505 }, { "epoch": 0.06, "grad_norm": 7.0637688636779785, "learning_rate": 3.995499999999999e-06, "loss": 0.3058, "step": 510 }, { "epoch": 0.060588235294117644, "grad_norm": 5.739589214324951, "learning_rate": 3.993e-06, "loss": 0.296, "step": 515 }, { "epoch": 0.0611764705882353, "grad_norm": 4.647040367126465, "learning_rate": 3.9905e-06, "loss": 0.3147, "step": 520 }, { "epoch": 0.061764705882352944, "grad_norm": 5.974977970123291, "learning_rate": 3.988e-06, "loss": 0.3188, "step": 525 }, { "epoch": 0.06235294117647059, "grad_norm": 6.136227130889893, "learning_rate": 3.9855e-06, "loss": 0.2604, "step": 530 }, { "epoch": 0.06294117647058824, "grad_norm": 6.183608055114746, "learning_rate": 3.983e-06, "loss": 0.2991, "step": 535 }, { "epoch": 0.06352941176470588, "grad_norm": 6.228733539581299, "learning_rate": 3.9805e-06, "loss": 0.2279, "step": 540 }, { "epoch": 0.06411764705882353, "grad_norm": 6.658992290496826, "learning_rate": 3.978e-06, "loss": 0.2838, "step": 545 }, { "epoch": 0.06470588235294118, "grad_norm": 5.978189945220947, "learning_rate": 3.9754999999999995e-06, "loss": 0.2182, "step": 550 }, { "epoch": 0.06529411764705882, "grad_norm": 4.904825210571289, "learning_rate": 3.973e-06, "loss": 0.2415, "step": 555 }, { "epoch": 0.06588235294117648, "grad_norm": 5.675756931304932, "learning_rate": 3.9704999999999995e-06, "loss": 0.2917, "step": 560 }, { "epoch": 0.06647058823529411, "grad_norm": 6.152967929840088, "learning_rate": 3.968e-06, "loss": 0.2558, "step": 565 }, { "epoch": 0.06705882352941177, "grad_norm": 6.136238098144531, "learning_rate": 3.9654999999999996e-06, "loss": 0.2474, "step": 570 }, { "epoch": 0.06764705882352941, "grad_norm": 7.141924858093262, "learning_rate": 3.963e-06, "loss": 0.2509, "step": 575 }, { "epoch": 0.06823529411764706, "grad_norm": 5.643259525299072, "learning_rate": 3.9605e-06, "loss": 0.2489, "step": 580 }, { "epoch": 0.0688235294117647, "grad_norm": 5.501373767852783, "learning_rate": 3.958e-06, "loss": 0.2178, "step": 585 }, { "epoch": 0.06941176470588235, "grad_norm": 6.953498840332031, "learning_rate": 3.9555e-06, "loss": 0.2693, "step": 590 }, { "epoch": 0.07, "grad_norm": 6.3730573654174805, "learning_rate": 3.952999999999999e-06, "loss": 0.2481, "step": 595 }, { "epoch": 0.07058823529411765, "grad_norm": 4.494640350341797, "learning_rate": 3.9505e-06, "loss": 0.2374, "step": 600 }, { "epoch": 0.0711764705882353, "grad_norm": 4.978265762329102, "learning_rate": 3.948e-06, "loss": 0.2479, "step": 605 }, { "epoch": 0.07176470588235294, "grad_norm": 6.582556247711182, "learning_rate": 3.9455e-06, "loss": 0.2264, "step": 610 }, { "epoch": 0.07235294117647059, "grad_norm": 5.475729465484619, "learning_rate": 3.943e-06, "loss": 0.2177, "step": 615 }, { "epoch": 0.07294117647058823, "grad_norm": 5.280641555786133, "learning_rate": 3.9405e-06, "loss": 0.2079, "step": 620 }, { "epoch": 0.07352941176470588, "grad_norm": 5.51185941696167, "learning_rate": 3.938e-06, "loss": 0.2298, "step": 625 }, { "epoch": 0.07411764705882352, "grad_norm": 5.52700662612915, "learning_rate": 3.9355e-06, "loss": 0.2731, "step": 630 }, { "epoch": 0.07470588235294118, "grad_norm": 6.956870079040527, "learning_rate": 3.9329999999999994e-06, "loss": 0.8592, "step": 635 }, { "epoch": 0.07529411764705882, "grad_norm": 5.793115139007568, "learning_rate": 3.9305e-06, "loss": 0.645, "step": 640 }, { "epoch": 0.07588235294117647, "grad_norm": 6.2578253746032715, "learning_rate": 3.9279999999999995e-06, "loss": 0.489, "step": 645 }, { "epoch": 0.07647058823529412, "grad_norm": 6.280098915100098, "learning_rate": 3.9255e-06, "loss": 0.4687, "step": 650 }, { "epoch": 0.07705882352941176, "grad_norm": 4.2571563720703125, "learning_rate": 3.9229999999999995e-06, "loss": 0.3064, "step": 655 }, { "epoch": 0.07764705882352942, "grad_norm": 5.740575790405273, "learning_rate": 3.9205e-06, "loss": 0.3015, "step": 660 }, { "epoch": 0.07823529411764706, "grad_norm": 4.379879474639893, "learning_rate": 3.918e-06, "loss": 0.2966, "step": 665 }, { "epoch": 0.07882352941176471, "grad_norm": 3.0233917236328125, "learning_rate": 3.9155e-06, "loss": 0.1898, "step": 670 }, { "epoch": 0.07941176470588235, "grad_norm": 4.23185920715332, "learning_rate": 3.913e-06, "loss": 0.156, "step": 675 }, { "epoch": 0.08, "grad_norm": 3.7305707931518555, "learning_rate": 3.9105e-06, "loss": 0.204, "step": 680 }, { "epoch": 0.08058823529411764, "grad_norm": 4.188661098480225, "learning_rate": 3.908e-06, "loss": 0.1613, "step": 685 }, { "epoch": 0.0811764705882353, "grad_norm": 10.491097450256348, "learning_rate": 3.9055e-06, "loss": 0.1804, "step": 690 }, { "epoch": 0.08176470588235295, "grad_norm": 4.781033992767334, "learning_rate": 3.903e-06, "loss": 0.193, "step": 695 }, { "epoch": 0.08235294117647059, "grad_norm": 4.572100639343262, "learning_rate": 3.9005e-06, "loss": 0.1926, "step": 700 }, { "epoch": 0.08294117647058824, "grad_norm": 5.542261123657227, "learning_rate": 3.898e-06, "loss": 0.2188, "step": 705 }, { "epoch": 0.08352941176470588, "grad_norm": 5.298327922821045, "learning_rate": 3.8955e-06, "loss": 0.2304, "step": 710 }, { "epoch": 0.08411764705882353, "grad_norm": 5.5341949462890625, "learning_rate": 3.893e-06, "loss": 0.2249, "step": 715 }, { "epoch": 0.08470588235294117, "grad_norm": 5.525173187255859, "learning_rate": 3.8904999999999994e-06, "loss": 0.2282, "step": 720 }, { "epoch": 0.08529411764705883, "grad_norm": 6.741849422454834, "learning_rate": 3.888e-06, "loss": 0.2418, "step": 725 }, { "epoch": 0.08588235294117647, "grad_norm": 4.5786237716674805, "learning_rate": 3.8854999999999995e-06, "loss": 0.2525, "step": 730 }, { "epoch": 0.08647058823529412, "grad_norm": 5.180244445800781, "learning_rate": 3.883e-06, "loss": 0.261, "step": 735 }, { "epoch": 0.08705882352941176, "grad_norm": 4.789542198181152, "learning_rate": 3.8804999999999995e-06, "loss": 0.2262, "step": 740 }, { "epoch": 0.08764705882352941, "grad_norm": 5.503627300262451, "learning_rate": 3.878e-06, "loss": 0.2297, "step": 745 }, { "epoch": 0.08823529411764706, "grad_norm": 5.6325364112854, "learning_rate": 3.8754999999999996e-06, "loss": 0.3085, "step": 750 }, { "epoch": 0.0888235294117647, "grad_norm": 4.962948799133301, "learning_rate": 3.873e-06, "loss": 0.328, "step": 755 }, { "epoch": 0.08941176470588236, "grad_norm": 5.155531883239746, "learning_rate": 3.8705e-06, "loss": 0.3275, "step": 760 }, { "epoch": 0.09, "grad_norm": 4.880023002624512, "learning_rate": 3.868e-06, "loss": 0.393, "step": 765 }, { "epoch": 0.09058823529411765, "grad_norm": 5.44528341293335, "learning_rate": 3.8655e-06, "loss": 0.4392, "step": 770 }, { "epoch": 0.09117647058823529, "grad_norm": 4.727124214172363, "learning_rate": 3.863e-06, "loss": 0.4128, "step": 775 }, { "epoch": 0.09176470588235294, "grad_norm": 4.148149013519287, "learning_rate": 3.8605e-06, "loss": 0.3476, "step": 780 }, { "epoch": 0.09235294117647058, "grad_norm": 4.466519832611084, "learning_rate": 3.858e-06, "loss": 0.4156, "step": 785 }, { "epoch": 0.09294117647058824, "grad_norm": 5.839615345001221, "learning_rate": 3.8555e-06, "loss": 0.3783, "step": 790 }, { "epoch": 0.09352941176470589, "grad_norm": 4.525641918182373, "learning_rate": 3.853e-06, "loss": 0.4087, "step": 795 }, { "epoch": 0.09411764705882353, "grad_norm": 4.3257246017456055, "learning_rate": 3.8505e-06, "loss": 0.362, "step": 800 }, { "epoch": 0.09470588235294118, "grad_norm": 4.383883953094482, "learning_rate": 3.847999999999999e-06, "loss": 0.3493, "step": 805 }, { "epoch": 0.09529411764705882, "grad_norm": 4.919656753540039, "learning_rate": 3.8455e-06, "loss": 0.3623, "step": 810 }, { "epoch": 0.09588235294117647, "grad_norm": 5.0872979164123535, "learning_rate": 3.8429999999999995e-06, "loss": 0.352, "step": 815 }, { "epoch": 0.09647058823529411, "grad_norm": 4.851845741271973, "learning_rate": 3.8405e-06, "loss": 0.3378, "step": 820 }, { "epoch": 0.09705882352941177, "grad_norm": 5.158241271972656, "learning_rate": 3.8379999999999995e-06, "loss": 0.2956, "step": 825 }, { "epoch": 0.0976470588235294, "grad_norm": 5.6491265296936035, "learning_rate": 3.8355e-06, "loss": 0.3009, "step": 830 }, { "epoch": 0.09823529411764706, "grad_norm": 4.210770130157471, "learning_rate": 3.833e-06, "loss": 0.246, "step": 835 }, { "epoch": 0.0988235294117647, "grad_norm": 5.702970027923584, "learning_rate": 3.8305e-06, "loss": 0.2751, "step": 840 }, { "epoch": 0.09941176470588235, "grad_norm": 5.113010406494141, "learning_rate": 3.828e-06, "loss": 0.2816, "step": 845 }, { "epoch": 0.1, "grad_norm": 5.5813798904418945, "learning_rate": 3.8255e-06, "loss": 0.2342, "step": 850 }, { "epoch": 0.10058823529411764, "grad_norm": 5.855239391326904, "learning_rate": 3.823e-06, "loss": 0.2209, "step": 855 }, { "epoch": 0.1011764705882353, "grad_norm": 5.444701194763184, "learning_rate": 3.8205e-06, "loss": 0.232, "step": 860 }, { "epoch": 0.10176470588235294, "grad_norm": 6.598945617675781, "learning_rate": 3.818e-06, "loss": 0.2299, "step": 865 }, { "epoch": 0.10235294117647059, "grad_norm": 6.535606384277344, "learning_rate": 3.8155e-06, "loss": 0.2316, "step": 870 }, { "epoch": 0.10294117647058823, "grad_norm": 6.9219841957092285, "learning_rate": 3.813e-06, "loss": 0.2756, "step": 875 }, { "epoch": 0.10352941176470588, "grad_norm": 4.814761161804199, "learning_rate": 3.8105e-06, "loss": 0.2811, "step": 880 }, { "epoch": 0.10411764705882352, "grad_norm": 3.818138599395752, "learning_rate": 3.808e-06, "loss": 0.3494, "step": 885 }, { "epoch": 0.10470588235294118, "grad_norm": 5.272932052612305, "learning_rate": 3.8055e-06, "loss": 0.329, "step": 890 }, { "epoch": 0.10529411764705883, "grad_norm": 4.8542070388793945, "learning_rate": 3.803e-06, "loss": 0.3697, "step": 895 }, { "epoch": 0.10588235294117647, "grad_norm": 4.821626663208008, "learning_rate": 3.8005e-06, "loss": 0.3722, "step": 900 }, { "epoch": 0.10647058823529412, "grad_norm": 4.089189052581787, "learning_rate": 3.798e-06, "loss": 0.3313, "step": 905 }, { "epoch": 0.10705882352941176, "grad_norm": 5.140932083129883, "learning_rate": 3.7955e-06, "loss": 0.3759, "step": 910 }, { "epoch": 0.10764705882352942, "grad_norm": 3.9780690670013428, "learning_rate": 3.793e-06, "loss": 0.3643, "step": 915 }, { "epoch": 0.10823529411764705, "grad_norm": 5.412399768829346, "learning_rate": 3.7905e-06, "loss": 0.4023, "step": 920 }, { "epoch": 0.10882352941176471, "grad_norm": 4.042774677276611, "learning_rate": 3.7879999999999996e-06, "loss": 0.3708, "step": 925 }, { "epoch": 0.10941176470588235, "grad_norm": 5.141615867614746, "learning_rate": 3.7854999999999996e-06, "loss": 0.3875, "step": 930 }, { "epoch": 0.11, "grad_norm": 5.521014213562012, "learning_rate": 3.7829999999999996e-06, "loss": 0.3602, "step": 935 }, { "epoch": 0.11058823529411765, "grad_norm": 4.1281819343566895, "learning_rate": 3.7804999999999996e-06, "loss": 0.3546, "step": 940 }, { "epoch": 0.1111764705882353, "grad_norm": 6.497892379760742, "learning_rate": 3.7779999999999997e-06, "loss": 0.3858, "step": 945 }, { "epoch": 0.11176470588235295, "grad_norm": 7.118585586547852, "learning_rate": 3.7755e-06, "loss": 0.3073, "step": 950 }, { "epoch": 0.11235294117647059, "grad_norm": 4.879085540771484, "learning_rate": 3.773e-06, "loss": 0.2779, "step": 955 }, { "epoch": 0.11294117647058824, "grad_norm": 5.990501403808594, "learning_rate": 3.7705e-06, "loss": 0.2895, "step": 960 }, { "epoch": 0.11352941176470588, "grad_norm": 5.490088939666748, "learning_rate": 3.7679999999999998e-06, "loss": 0.2766, "step": 965 }, { "epoch": 0.11411764705882353, "grad_norm": 8.519075393676758, "learning_rate": 3.7654999999999998e-06, "loss": 0.3154, "step": 970 }, { "epoch": 0.11470588235294117, "grad_norm": 5.58041524887085, "learning_rate": 3.763e-06, "loss": 0.2216, "step": 975 }, { "epoch": 0.11529411764705882, "grad_norm": 6.6442365646362305, "learning_rate": 3.7605e-06, "loss": 0.2493, "step": 980 }, { "epoch": 0.11588235294117646, "grad_norm": 4.384298801422119, "learning_rate": 3.758e-06, "loss": 0.2418, "step": 985 }, { "epoch": 0.11647058823529412, "grad_norm": 5.432238578796387, "learning_rate": 3.7555e-06, "loss": 0.2506, "step": 990 }, { "epoch": 0.11705882352941177, "grad_norm": 5.37861442565918, "learning_rate": 3.753e-06, "loss": 0.2325, "step": 995 }, { "epoch": 0.11764705882352941, "grad_norm": 5.040102481842041, "learning_rate": 3.7505e-06, "loss": 0.2306, "step": 1000 }, { "epoch": 0.11764705882352941, "eval_loss": 0.20429430902004242, "eval_runtime": 216.9903, "eval_samples_per_second": 8.125, "eval_steps_per_second": 2.032, "eval_wer": 12.331596910364649, "step": 1000 }, { "epoch": 0.11823529411764706, "grad_norm": 5.372447490692139, "learning_rate": 3.748e-06, "loss": 0.2214, "step": 1005 }, { "epoch": 0.1188235294117647, "grad_norm": 5.384559154510498, "learning_rate": 3.7454999999999995e-06, "loss": 0.2206, "step": 1010 }, { "epoch": 0.11941176470588236, "grad_norm": 5.93951416015625, "learning_rate": 3.7429999999999996e-06, "loss": 0.2328, "step": 1015 }, { "epoch": 0.12, "grad_norm": 5.868914604187012, "learning_rate": 3.7404999999999996e-06, "loss": 0.24, "step": 1020 }, { "epoch": 0.12058823529411765, "grad_norm": 5.27166748046875, "learning_rate": 3.7379999999999996e-06, "loss": 0.2045, "step": 1025 }, { "epoch": 0.12117647058823529, "grad_norm": 4.123020648956299, "learning_rate": 3.7355e-06, "loss": 0.2236, "step": 1030 }, { "epoch": 0.12176470588235294, "grad_norm": 5.137660980224609, "learning_rate": 3.733e-06, "loss": 0.2418, "step": 1035 }, { "epoch": 0.1223529411764706, "grad_norm": 4.857790946960449, "learning_rate": 3.7305e-06, "loss": 0.2082, "step": 1040 }, { "epoch": 0.12294117647058823, "grad_norm": 4.801690578460693, "learning_rate": 3.728e-06, "loss": 0.2145, "step": 1045 }, { "epoch": 0.12352941176470589, "grad_norm": 5.351355075836182, "learning_rate": 3.7254999999999997e-06, "loss": 0.2191, "step": 1050 }, { "epoch": 0.12411764705882353, "grad_norm": 5.1854634284973145, "learning_rate": 3.7229999999999998e-06, "loss": 0.2256, "step": 1055 }, { "epoch": 0.12470588235294118, "grad_norm": 5.637546062469482, "learning_rate": 3.7204999999999998e-06, "loss": 0.2202, "step": 1060 }, { "epoch": 0.12529411764705883, "grad_norm": 5.312127590179443, "learning_rate": 3.718e-06, "loss": 0.2381, "step": 1065 }, { "epoch": 0.12588235294117647, "grad_norm": 5.012390613555908, "learning_rate": 3.7155e-06, "loss": 0.2156, "step": 1070 }, { "epoch": 0.1264705882352941, "grad_norm": 4.8346405029296875, "learning_rate": 3.713e-06, "loss": 0.2186, "step": 1075 }, { "epoch": 0.12705882352941175, "grad_norm": 5.988662242889404, "learning_rate": 3.7105e-06, "loss": 0.2496, "step": 1080 }, { "epoch": 0.12764705882352942, "grad_norm": 4.585602283477783, "learning_rate": 3.708e-06, "loss": 0.2139, "step": 1085 }, { "epoch": 0.12823529411764706, "grad_norm": 4.92340612411499, "learning_rate": 3.7054999999999995e-06, "loss": 0.2406, "step": 1090 }, { "epoch": 0.1288235294117647, "grad_norm": 4.198245048522949, "learning_rate": 3.7029999999999995e-06, "loss": 0.204, "step": 1095 }, { "epoch": 0.12941176470588237, "grad_norm": 4.813859462738037, "learning_rate": 3.7004999999999996e-06, "loss": 0.1986, "step": 1100 }, { "epoch": 0.13, "grad_norm": 6.106467247009277, "learning_rate": 3.6979999999999996e-06, "loss": 0.2574, "step": 1105 }, { "epoch": 0.13058823529411764, "grad_norm": 5.156613826751709, "learning_rate": 3.6955e-06, "loss": 0.1957, "step": 1110 }, { "epoch": 0.13117647058823528, "grad_norm": 5.812555313110352, "learning_rate": 3.693e-06, "loss": 0.2485, "step": 1115 }, { "epoch": 0.13176470588235295, "grad_norm": 4.636993885040283, "learning_rate": 3.6905e-06, "loss": 0.2136, "step": 1120 }, { "epoch": 0.1323529411764706, "grad_norm": 4.9718546867370605, "learning_rate": 3.688e-06, "loss": 0.2199, "step": 1125 }, { "epoch": 0.13294117647058823, "grad_norm": 6.394423484802246, "learning_rate": 3.6855e-06, "loss": 0.2597, "step": 1130 }, { "epoch": 0.13352941176470587, "grad_norm": 7.195622444152832, "learning_rate": 3.6829999999999997e-06, "loss": 0.244, "step": 1135 }, { "epoch": 0.13411764705882354, "grad_norm": 4.652588844299316, "learning_rate": 3.6804999999999997e-06, "loss": 0.1948, "step": 1140 }, { "epoch": 0.13470588235294118, "grad_norm": 6.326886177062988, "learning_rate": 3.6779999999999998e-06, "loss": 0.2367, "step": 1145 }, { "epoch": 0.13529411764705881, "grad_norm": 5.110639572143555, "learning_rate": 3.6755e-06, "loss": 0.2252, "step": 1150 }, { "epoch": 0.13588235294117648, "grad_norm": 7.251776695251465, "learning_rate": 3.673e-06, "loss": 0.2209, "step": 1155 }, { "epoch": 0.13647058823529412, "grad_norm": 5.178691864013672, "learning_rate": 3.6705e-06, "loss": 0.211, "step": 1160 }, { "epoch": 0.13705882352941176, "grad_norm": 7.980264663696289, "learning_rate": 3.668e-06, "loss": 0.1995, "step": 1165 }, { "epoch": 0.1376470588235294, "grad_norm": 5.534268856048584, "learning_rate": 3.6655e-06, "loss": 0.2224, "step": 1170 }, { "epoch": 0.13823529411764707, "grad_norm": 4.844344139099121, "learning_rate": 3.6629999999999995e-06, "loss": 0.2017, "step": 1175 }, { "epoch": 0.1388235294117647, "grad_norm": 5.262186527252197, "learning_rate": 3.6604999999999995e-06, "loss": 0.2341, "step": 1180 }, { "epoch": 0.13941176470588235, "grad_norm": 4.123838901519775, "learning_rate": 3.658e-06, "loss": 0.2337, "step": 1185 }, { "epoch": 0.14, "grad_norm": 5.441091060638428, "learning_rate": 3.6555e-06, "loss": 0.2563, "step": 1190 }, { "epoch": 0.14058823529411765, "grad_norm": 4.401068687438965, "learning_rate": 3.653e-06, "loss": 0.3152, "step": 1195 }, { "epoch": 0.1411764705882353, "grad_norm": 4.959137439727783, "learning_rate": 3.6505e-06, "loss": 0.3179, "step": 1200 }, { "epoch": 0.14176470588235293, "grad_norm": 5.01535701751709, "learning_rate": 3.648e-06, "loss": 0.325, "step": 1205 }, { "epoch": 0.1423529411764706, "grad_norm": 5.620340824127197, "learning_rate": 3.6455e-06, "loss": 0.3792, "step": 1210 }, { "epoch": 0.14294117647058824, "grad_norm": 3.6528735160827637, "learning_rate": 3.6429999999999997e-06, "loss": 0.3548, "step": 1215 }, { "epoch": 0.14352941176470588, "grad_norm": 4.610378742218018, "learning_rate": 3.6404999999999997e-06, "loss": 0.3941, "step": 1220 }, { "epoch": 0.14411764705882352, "grad_norm": 6.261916160583496, "learning_rate": 3.6379999999999997e-06, "loss": 0.4151, "step": 1225 }, { "epoch": 0.14470588235294118, "grad_norm": 4.071147441864014, "learning_rate": 3.6354999999999998e-06, "loss": 0.3938, "step": 1230 }, { "epoch": 0.14529411764705882, "grad_norm": 4.946579456329346, "learning_rate": 3.6329999999999998e-06, "loss": 0.3678, "step": 1235 }, { "epoch": 0.14588235294117646, "grad_norm": 4.924219131469727, "learning_rate": 3.6305e-06, "loss": 0.3493, "step": 1240 }, { "epoch": 0.14647058823529413, "grad_norm": 4.254496097564697, "learning_rate": 3.628e-06, "loss": 0.3273, "step": 1245 }, { "epoch": 0.14705882352941177, "grad_norm": 4.448436737060547, "learning_rate": 3.6255e-06, "loss": 0.3497, "step": 1250 }, { "epoch": 0.1476470588235294, "grad_norm": 4.587835788726807, "learning_rate": 3.623e-06, "loss": 0.4378, "step": 1255 }, { "epoch": 0.14823529411764705, "grad_norm": 6.557760715484619, "learning_rate": 3.6204999999999995e-06, "loss": 0.3738, "step": 1260 }, { "epoch": 0.14882352941176472, "grad_norm": 4.904112339019775, "learning_rate": 3.618e-06, "loss": 0.2865, "step": 1265 }, { "epoch": 0.14941176470588236, "grad_norm": 4.942874908447266, "learning_rate": 3.6155e-06, "loss": 0.2546, "step": 1270 }, { "epoch": 0.15, "grad_norm": 5.031833648681641, "learning_rate": 3.613e-06, "loss": 0.2702, "step": 1275 }, { "epoch": 0.15058823529411763, "grad_norm": 5.410985946655273, "learning_rate": 3.6105e-06, "loss": 0.2909, "step": 1280 }, { "epoch": 0.1511764705882353, "grad_norm": 5.556969165802002, "learning_rate": 3.608e-06, "loss": 0.2489, "step": 1285 }, { "epoch": 0.15176470588235294, "grad_norm": 5.32189416885376, "learning_rate": 3.6055e-06, "loss": 0.2347, "step": 1290 }, { "epoch": 0.15235294117647058, "grad_norm": 5.025787830352783, "learning_rate": 3.603e-06, "loss": 0.2333, "step": 1295 }, { "epoch": 0.15294117647058825, "grad_norm": 5.387501239776611, "learning_rate": 3.6004999999999997e-06, "loss": 0.247, "step": 1300 }, { "epoch": 0.1535294117647059, "grad_norm": 6.138737201690674, "learning_rate": 3.5979999999999997e-06, "loss": 0.2725, "step": 1305 }, { "epoch": 0.15411764705882353, "grad_norm": 5.1770100593566895, "learning_rate": 3.5954999999999997e-06, "loss": 0.1933, "step": 1310 }, { "epoch": 0.15470588235294117, "grad_norm": 4.4650468826293945, "learning_rate": 3.5929999999999997e-06, "loss": 0.2496, "step": 1315 }, { "epoch": 0.15529411764705883, "grad_norm": 4.732604026794434, "learning_rate": 3.5904999999999998e-06, "loss": 0.2832, "step": 1320 }, { "epoch": 0.15588235294117647, "grad_norm": 4.85684871673584, "learning_rate": 3.5879999999999998e-06, "loss": 0.3085, "step": 1325 }, { "epoch": 0.1564705882352941, "grad_norm": 4.6917924880981445, "learning_rate": 3.5855e-06, "loss": 0.3523, "step": 1330 }, { "epoch": 0.15705882352941178, "grad_norm": 4.3678388595581055, "learning_rate": 3.583e-06, "loss": 0.3027, "step": 1335 }, { "epoch": 0.15764705882352942, "grad_norm": 4.021628379821777, "learning_rate": 3.5804999999999994e-06, "loss": 0.3496, "step": 1340 }, { "epoch": 0.15823529411764706, "grad_norm": 3.8278491497039795, "learning_rate": 3.578e-06, "loss": 0.2881, "step": 1345 }, { "epoch": 0.1588235294117647, "grad_norm": 4.725404739379883, "learning_rate": 3.5755e-06, "loss": 0.3413, "step": 1350 }, { "epoch": 0.15941176470588236, "grad_norm": 4.69443941116333, "learning_rate": 3.573e-06, "loss": 0.3917, "step": 1355 }, { "epoch": 0.16, "grad_norm": 5.181183338165283, "learning_rate": 3.5705e-06, "loss": 0.3611, "step": 1360 }, { "epoch": 0.16058823529411764, "grad_norm": 4.6248860359191895, "learning_rate": 3.568e-06, "loss": 0.4132, "step": 1365 }, { "epoch": 0.16117647058823528, "grad_norm": 4.929358005523682, "learning_rate": 3.5655e-06, "loss": 0.4273, "step": 1370 }, { "epoch": 0.16176470588235295, "grad_norm": 4.449674129486084, "learning_rate": 3.563e-06, "loss": 0.3617, "step": 1375 }, { "epoch": 0.1623529411764706, "grad_norm": 4.592928886413574, "learning_rate": 3.5605e-06, "loss": 0.3117, "step": 1380 }, { "epoch": 0.16294117647058823, "grad_norm": 4.599587917327881, "learning_rate": 3.5579999999999996e-06, "loss": 0.3114, "step": 1385 }, { "epoch": 0.1635294117647059, "grad_norm": 4.835196495056152, "learning_rate": 3.5554999999999997e-06, "loss": 0.2709, "step": 1390 }, { "epoch": 0.16411764705882353, "grad_norm": 6.972946643829346, "learning_rate": 3.5529999999999997e-06, "loss": 0.2965, "step": 1395 }, { "epoch": 0.16470588235294117, "grad_norm": 4.811103820800781, "learning_rate": 3.5504999999999997e-06, "loss": 0.3204, "step": 1400 }, { "epoch": 0.1652941176470588, "grad_norm": 4.717560291290283, "learning_rate": 3.5479999999999997e-06, "loss": 0.2602, "step": 1405 }, { "epoch": 0.16588235294117648, "grad_norm": 5.278004169464111, "learning_rate": 3.5454999999999998e-06, "loss": 0.2354, "step": 1410 }, { "epoch": 0.16647058823529412, "grad_norm": 5.211602687835693, "learning_rate": 3.543e-06, "loss": 0.2861, "step": 1415 }, { "epoch": 0.16705882352941176, "grad_norm": 5.527140140533447, "learning_rate": 3.5405000000000002e-06, "loss": 0.2544, "step": 1420 }, { "epoch": 0.1676470588235294, "grad_norm": 6.314732551574707, "learning_rate": 3.538e-06, "loss": 0.2149, "step": 1425 }, { "epoch": 0.16823529411764707, "grad_norm": 4.920109748840332, "learning_rate": 3.5355e-06, "loss": 0.2202, "step": 1430 }, { "epoch": 0.1688235294117647, "grad_norm": 4.541561603546143, "learning_rate": 3.533e-06, "loss": 0.2122, "step": 1435 }, { "epoch": 0.16941176470588235, "grad_norm": 4.952088356018066, "learning_rate": 3.5305e-06, "loss": 0.2207, "step": 1440 }, { "epoch": 0.17, "grad_norm": 5.790253639221191, "learning_rate": 3.528e-06, "loss": 0.1972, "step": 1445 }, { "epoch": 0.17058823529411765, "grad_norm": 5.699951648712158, "learning_rate": 3.5255e-06, "loss": 0.2087, "step": 1450 }, { "epoch": 0.1711764705882353, "grad_norm": 4.806551933288574, "learning_rate": 3.523e-06, "loss": 0.217, "step": 1455 }, { "epoch": 0.17176470588235293, "grad_norm": 4.645950794219971, "learning_rate": 3.5205e-06, "loss": 0.2343, "step": 1460 }, { "epoch": 0.1723529411764706, "grad_norm": 6.063928127288818, "learning_rate": 3.5179999999999996e-06, "loss": 0.2266, "step": 1465 }, { "epoch": 0.17294117647058824, "grad_norm": 5.434706687927246, "learning_rate": 3.5154999999999996e-06, "loss": 0.2257, "step": 1470 }, { "epoch": 0.17352941176470588, "grad_norm": 6.158682823181152, "learning_rate": 3.5129999999999997e-06, "loss": 0.2552, "step": 1475 }, { "epoch": 0.17411764705882352, "grad_norm": 6.763813018798828, "learning_rate": 3.5104999999999997e-06, "loss": 0.2064, "step": 1480 }, { "epoch": 0.17470588235294118, "grad_norm": 7.0343170166015625, "learning_rate": 3.5079999999999997e-06, "loss": 0.2281, "step": 1485 }, { "epoch": 0.17529411764705882, "grad_norm": 6.138120174407959, "learning_rate": 3.5054999999999997e-06, "loss": 0.2166, "step": 1490 }, { "epoch": 0.17588235294117646, "grad_norm": 5.111180782318115, "learning_rate": 3.5029999999999997e-06, "loss": 0.1969, "step": 1495 }, { "epoch": 0.17647058823529413, "grad_norm": 4.685019493103027, "learning_rate": 3.5005e-06, "loss": 0.218, "step": 1500 }, { "epoch": 0.17647058823529413, "eval_loss": 0.2091599404811859, "eval_runtime": 216.2645, "eval_samples_per_second": 8.152, "eval_steps_per_second": 2.039, "eval_wer": 12.690856834920064, "step": 1500 }, { "epoch": 0.17705882352941177, "grad_norm": 4.987586498260498, "learning_rate": 3.4980000000000002e-06, "loss": 0.2162, "step": 1505 }, { "epoch": 0.1776470588235294, "grad_norm": 4.831029891967773, "learning_rate": 3.4955e-06, "loss": 0.1938, "step": 1510 }, { "epoch": 0.17823529411764705, "grad_norm": 4.7028913497924805, "learning_rate": 3.493e-06, "loss": 0.2346, "step": 1515 }, { "epoch": 0.17882352941176471, "grad_norm": 5.426929473876953, "learning_rate": 3.4905e-06, "loss": 0.2453, "step": 1520 }, { "epoch": 0.17941176470588235, "grad_norm": 5.629075050354004, "learning_rate": 3.488e-06, "loss": 0.2264, "step": 1525 }, { "epoch": 0.18, "grad_norm": 5.690018177032471, "learning_rate": 3.4855e-06, "loss": 0.2316, "step": 1530 }, { "epoch": 0.18058823529411766, "grad_norm": 5.642812728881836, "learning_rate": 3.483e-06, "loss": 0.2334, "step": 1535 }, { "epoch": 0.1811764705882353, "grad_norm": 5.45850133895874, "learning_rate": 3.4805e-06, "loss": 0.237, "step": 1540 }, { "epoch": 0.18176470588235294, "grad_norm": 5.326263427734375, "learning_rate": 3.478e-06, "loss": 0.1982, "step": 1545 }, { "epoch": 0.18235294117647058, "grad_norm": 5.388126373291016, "learning_rate": 3.4754999999999996e-06, "loss": 0.1749, "step": 1550 }, { "epoch": 0.18294117647058825, "grad_norm": 5.697015285491943, "learning_rate": 3.4729999999999996e-06, "loss": 0.2308, "step": 1555 }, { "epoch": 0.18352941176470589, "grad_norm": 4.502722263336182, "learning_rate": 3.4704999999999996e-06, "loss": 0.2049, "step": 1560 }, { "epoch": 0.18411764705882352, "grad_norm": 5.733637809753418, "learning_rate": 3.4679999999999997e-06, "loss": 0.2474, "step": 1565 }, { "epoch": 0.18470588235294116, "grad_norm": 4.061824798583984, "learning_rate": 3.4654999999999997e-06, "loss": 0.2287, "step": 1570 }, { "epoch": 0.18529411764705883, "grad_norm": 4.982180595397949, "learning_rate": 3.463e-06, "loss": 0.2731, "step": 1575 }, { "epoch": 0.18588235294117647, "grad_norm": 5.28707218170166, "learning_rate": 3.4605e-06, "loss": 0.3603, "step": 1580 }, { "epoch": 0.1864705882352941, "grad_norm": 4.933184623718262, "learning_rate": 3.458e-06, "loss": 0.3903, "step": 1585 }, { "epoch": 0.18705882352941178, "grad_norm": 4.234166622161865, "learning_rate": 3.4554999999999998e-06, "loss": 0.3541, "step": 1590 }, { "epoch": 0.18764705882352942, "grad_norm": 4.566234588623047, "learning_rate": 3.453e-06, "loss": 0.3329, "step": 1595 }, { "epoch": 0.18823529411764706, "grad_norm": 4.58034610748291, "learning_rate": 3.4505e-06, "loss": 0.3277, "step": 1600 }, { "epoch": 0.1888235294117647, "grad_norm": 3.9469552040100098, "learning_rate": 3.448e-06, "loss": 0.3597, "step": 1605 }, { "epoch": 0.18941176470588236, "grad_norm": 3.964589834213257, "learning_rate": 3.4455e-06, "loss": 0.3319, "step": 1610 }, { "epoch": 0.19, "grad_norm": 4.471989631652832, "learning_rate": 3.443e-06, "loss": 0.4119, "step": 1615 }, { "epoch": 0.19058823529411764, "grad_norm": 4.861955642700195, "learning_rate": 3.4405e-06, "loss": 0.3404, "step": 1620 }, { "epoch": 0.19117647058823528, "grad_norm": 3.8914120197296143, "learning_rate": 3.438e-06, "loss": 0.3446, "step": 1625 }, { "epoch": 0.19176470588235295, "grad_norm": 5.617094993591309, "learning_rate": 3.4355e-06, "loss": 0.4322, "step": 1630 }, { "epoch": 0.1923529411764706, "grad_norm": 4.572572231292725, "learning_rate": 3.4329999999999996e-06, "loss": 0.3311, "step": 1635 }, { "epoch": 0.19294117647058823, "grad_norm": 5.199912071228027, "learning_rate": 3.4304999999999996e-06, "loss": 0.3157, "step": 1640 }, { "epoch": 0.1935294117647059, "grad_norm": 4.877800941467285, "learning_rate": 3.4279999999999996e-06, "loss": 0.2627, "step": 1645 }, { "epoch": 0.19411764705882353, "grad_norm": 5.21949577331543, "learning_rate": 3.4254999999999996e-06, "loss": 0.2716, "step": 1650 }, { "epoch": 0.19470588235294117, "grad_norm": 6.272247791290283, "learning_rate": 3.423e-06, "loss": 0.2825, "step": 1655 }, { "epoch": 0.1952941176470588, "grad_norm": 4.400435447692871, "learning_rate": 3.4205e-06, "loss": 0.1845, "step": 1660 }, { "epoch": 0.19588235294117648, "grad_norm": 5.97955846786499, "learning_rate": 3.418e-06, "loss": 0.2942, "step": 1665 }, { "epoch": 0.19647058823529412, "grad_norm": 4.720533847808838, "learning_rate": 3.4155e-06, "loss": 0.2353, "step": 1670 }, { "epoch": 0.19705882352941176, "grad_norm": 4.945983409881592, "learning_rate": 3.4129999999999998e-06, "loss": 0.2436, "step": 1675 }, { "epoch": 0.1976470588235294, "grad_norm": 5.866307735443115, "learning_rate": 3.4104999999999998e-06, "loss": 0.2137, "step": 1680 }, { "epoch": 0.19823529411764707, "grad_norm": 4.8247199058532715, "learning_rate": 3.408e-06, "loss": 0.2292, "step": 1685 }, { "epoch": 0.1988235294117647, "grad_norm": 4.687603950500488, "learning_rate": 3.4055e-06, "loss": 0.2065, "step": 1690 }, { "epoch": 0.19941176470588234, "grad_norm": 5.3624587059021, "learning_rate": 3.403e-06, "loss": 0.2421, "step": 1695 }, { "epoch": 0.2, "grad_norm": 5.11728048324585, "learning_rate": 3.4005e-06, "loss": 0.2256, "step": 1700 }, { "epoch": 0.20058823529411765, "grad_norm": 5.00189733505249, "learning_rate": 3.398e-06, "loss": 0.2481, "step": 1705 }, { "epoch": 0.2011764705882353, "grad_norm": 4.499592304229736, "learning_rate": 3.3955e-06, "loss": 0.2025, "step": 1710 }, { "epoch": 0.20176470588235293, "grad_norm": 5.460886478424072, "learning_rate": 3.3929999999999995e-06, "loss": 0.2021, "step": 1715 }, { "epoch": 0.2023529411764706, "grad_norm": 5.0783257484436035, "learning_rate": 3.3904999999999996e-06, "loss": 0.2162, "step": 1720 }, { "epoch": 0.20294117647058824, "grad_norm": 4.745920181274414, "learning_rate": 3.3879999999999996e-06, "loss": 0.1977, "step": 1725 }, { "epoch": 0.20352941176470588, "grad_norm": 4.3456549644470215, "learning_rate": 3.3854999999999996e-06, "loss": 0.196, "step": 1730 }, { "epoch": 0.20411764705882354, "grad_norm": 5.27536153793335, "learning_rate": 3.383e-06, "loss": 0.1699, "step": 1735 }, { "epoch": 0.20470588235294118, "grad_norm": 6.126989364624023, "learning_rate": 3.3805e-06, "loss": 0.1983, "step": 1740 }, { "epoch": 0.20529411764705882, "grad_norm": 4.719369411468506, "learning_rate": 3.378e-06, "loss": 0.2039, "step": 1745 }, { "epoch": 0.20588235294117646, "grad_norm": 5.472100734710693, "learning_rate": 3.3755e-06, "loss": 0.2113, "step": 1750 }, { "epoch": 0.20647058823529413, "grad_norm": 5.173931121826172, "learning_rate": 3.373e-06, "loss": 0.2144, "step": 1755 }, { "epoch": 0.20705882352941177, "grad_norm": 5.292851448059082, "learning_rate": 3.3704999999999997e-06, "loss": 0.2004, "step": 1760 }, { "epoch": 0.2076470588235294, "grad_norm": 4.298732280731201, "learning_rate": 3.3679999999999998e-06, "loss": 0.2235, "step": 1765 }, { "epoch": 0.20823529411764705, "grad_norm": 4.527466297149658, "learning_rate": 3.3655e-06, "loss": 0.2607, "step": 1770 }, { "epoch": 0.2088235294117647, "grad_norm": 4.524056911468506, "learning_rate": 3.363e-06, "loss": 0.2132, "step": 1775 }, { "epoch": 0.20941176470588235, "grad_norm": 5.082316875457764, "learning_rate": 3.3605e-06, "loss": 0.2348, "step": 1780 }, { "epoch": 0.21, "grad_norm": 5.756541728973389, "learning_rate": 3.358e-06, "loss": 0.2249, "step": 1785 }, { "epoch": 0.21058823529411766, "grad_norm": 4.934853553771973, "learning_rate": 3.3555e-06, "loss": 0.2307, "step": 1790 }, { "epoch": 0.2111764705882353, "grad_norm": 4.983335018157959, "learning_rate": 3.353e-06, "loss": 0.2052, "step": 1795 }, { "epoch": 0.21176470588235294, "grad_norm": 4.598167896270752, "learning_rate": 3.3504999999999995e-06, "loss": 0.2235, "step": 1800 }, { "epoch": 0.21235294117647058, "grad_norm": 4.2112836837768555, "learning_rate": 3.3479999999999995e-06, "loss": 0.1986, "step": 1805 }, { "epoch": 0.21294117647058824, "grad_norm": 4.084402084350586, "learning_rate": 3.3454999999999996e-06, "loss": 0.1762, "step": 1810 }, { "epoch": 0.21352941176470588, "grad_norm": 4.943021297454834, "learning_rate": 3.343e-06, "loss": 0.177, "step": 1815 }, { "epoch": 0.21411764705882352, "grad_norm": 4.407837867736816, "learning_rate": 3.3405e-06, "loss": 0.1954, "step": 1820 }, { "epoch": 0.21470588235294116, "grad_norm": 4.4471211433410645, "learning_rate": 3.338e-06, "loss": 0.2628, "step": 1825 }, { "epoch": 0.21529411764705883, "grad_norm": 4.133277416229248, "learning_rate": 3.3355e-06, "loss": 0.236, "step": 1830 }, { "epoch": 0.21588235294117647, "grad_norm": 3.91284441947937, "learning_rate": 3.333e-06, "loss": 0.2705, "step": 1835 }, { "epoch": 0.2164705882352941, "grad_norm": 3.8954124450683594, "learning_rate": 3.3304999999999997e-06, "loss": 0.3378, "step": 1840 }, { "epoch": 0.21705882352941178, "grad_norm": 3.695194959640503, "learning_rate": 3.3279999999999997e-06, "loss": 0.2831, "step": 1845 }, { "epoch": 0.21764705882352942, "grad_norm": 4.869847774505615, "learning_rate": 3.3254999999999998e-06, "loss": 0.3712, "step": 1850 }, { "epoch": 0.21823529411764706, "grad_norm": 3.9035191535949707, "learning_rate": 3.3229999999999998e-06, "loss": 0.3103, "step": 1855 }, { "epoch": 0.2188235294117647, "grad_norm": 4.778807640075684, "learning_rate": 3.3205e-06, "loss": 0.4091, "step": 1860 }, { "epoch": 0.21941176470588236, "grad_norm": 4.748985767364502, "learning_rate": 3.318e-06, "loss": 0.3645, "step": 1865 }, { "epoch": 0.22, "grad_norm": 4.992386817932129, "learning_rate": 3.3155e-06, "loss": 0.3167, "step": 1870 }, { "epoch": 0.22058823529411764, "grad_norm": 4.4621782302856445, "learning_rate": 3.313e-06, "loss": 0.3713, "step": 1875 }, { "epoch": 0.2211764705882353, "grad_norm": 4.1011128425598145, "learning_rate": 3.3105e-06, "loss": 0.3302, "step": 1880 }, { "epoch": 0.22176470588235295, "grad_norm": 4.1284308433532715, "learning_rate": 3.3079999999999995e-06, "loss": 0.3472, "step": 1885 }, { "epoch": 0.2223529411764706, "grad_norm": 4.235365390777588, "learning_rate": 3.3054999999999995e-06, "loss": 0.3381, "step": 1890 }, { "epoch": 0.22294117647058823, "grad_norm": 4.047764778137207, "learning_rate": 3.303e-06, "loss": 0.2814, "step": 1895 }, { "epoch": 0.2235294117647059, "grad_norm": 4.3635172843933105, "learning_rate": 3.3005e-06, "loss": 0.2778, "step": 1900 }, { "epoch": 0.22411764705882353, "grad_norm": 4.416534423828125, "learning_rate": 3.298e-06, "loss": 0.2932, "step": 1905 }, { "epoch": 0.22470588235294117, "grad_norm": 3.745755672454834, "learning_rate": 3.2955e-06, "loss": 0.2268, "step": 1910 }, { "epoch": 0.2252941176470588, "grad_norm": 4.2595601081848145, "learning_rate": 3.293e-06, "loss": 0.2046, "step": 1915 }, { "epoch": 0.22588235294117648, "grad_norm": 4.304042816162109, "learning_rate": 3.2905e-06, "loss": 0.23, "step": 1920 }, { "epoch": 0.22647058823529412, "grad_norm": 4.172942161560059, "learning_rate": 3.2879999999999997e-06, "loss": 0.2721, "step": 1925 }, { "epoch": 0.22705882352941176, "grad_norm": 4.37096643447876, "learning_rate": 3.2854999999999997e-06, "loss": 0.2078, "step": 1930 }, { "epoch": 0.22764705882352942, "grad_norm": 3.508002519607544, "learning_rate": 3.2829999999999997e-06, "loss": 0.2475, "step": 1935 }, { "epoch": 0.22823529411764706, "grad_norm": 3.484143018722534, "learning_rate": 3.2804999999999998e-06, "loss": 0.1905, "step": 1940 }, { "epoch": 0.2288235294117647, "grad_norm": 4.076836585998535, "learning_rate": 3.2779999999999998e-06, "loss": 0.2469, "step": 1945 }, { "epoch": 0.22941176470588234, "grad_norm": 4.40281867980957, "learning_rate": 3.2755e-06, "loss": 0.2474, "step": 1950 }, { "epoch": 0.23, "grad_norm": 3.8083369731903076, "learning_rate": 3.273e-06, "loss": 0.1711, "step": 1955 }, { "epoch": 0.23058823529411765, "grad_norm": 3.9586997032165527, "learning_rate": 3.2705e-06, "loss": 0.1933, "step": 1960 }, { "epoch": 0.2311764705882353, "grad_norm": 5.057861328125, "learning_rate": 3.2679999999999995e-06, "loss": 0.2367, "step": 1965 }, { "epoch": 0.23176470588235293, "grad_norm": 4.677099704742432, "learning_rate": 3.2655e-06, "loss": 0.1981, "step": 1970 }, { "epoch": 0.2323529411764706, "grad_norm": 4.976651191711426, "learning_rate": 3.263e-06, "loss": 0.2081, "step": 1975 }, { "epoch": 0.23294117647058823, "grad_norm": 4.262833595275879, "learning_rate": 3.2605e-06, "loss": 0.2288, "step": 1980 }, { "epoch": 0.23352941176470587, "grad_norm": 5.324251174926758, "learning_rate": 3.258e-06, "loss": 0.2004, "step": 1985 }, { "epoch": 0.23411764705882354, "grad_norm": 4.684001445770264, "learning_rate": 3.2555e-06, "loss": 0.2184, "step": 1990 }, { "epoch": 0.23470588235294118, "grad_norm": 5.974236965179443, "learning_rate": 3.253e-06, "loss": 0.2475, "step": 1995 }, { "epoch": 0.23529411764705882, "grad_norm": 6.527338981628418, "learning_rate": 3.2505e-06, "loss": 0.2178, "step": 2000 }, { "epoch": 0.23529411764705882, "eval_loss": 0.19680312275886536, "eval_runtime": 216.379, "eval_samples_per_second": 8.148, "eval_steps_per_second": 2.038, "eval_wer": 12.259744925453566, "step": 2000 }, { "epoch": 0.23588235294117646, "grad_norm": 5.7884745597839355, "learning_rate": 3.248e-06, "loss": 0.2022, "step": 2005 }, { "epoch": 0.23647058823529413, "grad_norm": 6.500155448913574, "learning_rate": 3.2454999999999997e-06, "loss": 0.1876, "step": 2010 }, { "epoch": 0.23705882352941177, "grad_norm": 5.068775653839111, "learning_rate": 3.2429999999999997e-06, "loss": 0.2378, "step": 2015 }, { "epoch": 0.2376470588235294, "grad_norm": 4.796257972717285, "learning_rate": 3.2404999999999997e-06, "loss": 0.2217, "step": 2020 }, { "epoch": 0.23823529411764705, "grad_norm": 4.534791946411133, "learning_rate": 3.2379999999999997e-06, "loss": 0.2125, "step": 2025 }, { "epoch": 0.2388235294117647, "grad_norm": 5.037903308868408, "learning_rate": 3.2354999999999998e-06, "loss": 0.2298, "step": 2030 }, { "epoch": 0.23941176470588235, "grad_norm": 6.077613353729248, "learning_rate": 3.233e-06, "loss": 0.2237, "step": 2035 }, { "epoch": 0.24, "grad_norm": 4.428590297698975, "learning_rate": 3.2305e-06, "loss": 0.2192, "step": 2040 }, { "epoch": 0.24058823529411766, "grad_norm": 4.670122146606445, "learning_rate": 3.2280000000000003e-06, "loss": 0.2212, "step": 2045 }, { "epoch": 0.2411764705882353, "grad_norm": 4.61508321762085, "learning_rate": 3.2255e-06, "loss": 0.2136, "step": 2050 }, { "epoch": 0.24176470588235294, "grad_norm": 4.95544958114624, "learning_rate": 3.223e-06, "loss": 0.2132, "step": 2055 }, { "epoch": 0.24235294117647058, "grad_norm": 5.424963474273682, "learning_rate": 3.2205e-06, "loss": 0.212, "step": 2060 }, { "epoch": 0.24294117647058824, "grad_norm": 4.786269187927246, "learning_rate": 3.218e-06, "loss": 0.1888, "step": 2065 }, { "epoch": 0.24352941176470588, "grad_norm": 4.085057258605957, "learning_rate": 3.2155e-06, "loss": 0.1691, "step": 2070 }, { "epoch": 0.24411764705882352, "grad_norm": 5.780310153961182, "learning_rate": 3.213e-06, "loss": 0.211, "step": 2075 }, { "epoch": 0.2447058823529412, "grad_norm": 6.000965118408203, "learning_rate": 3.2105e-06, "loss": 0.1896, "step": 2080 }, { "epoch": 0.24529411764705883, "grad_norm": 4.848095417022705, "learning_rate": 3.208e-06, "loss": 0.1977, "step": 2085 }, { "epoch": 0.24588235294117647, "grad_norm": 5.483325958251953, "learning_rate": 3.2054999999999996e-06, "loss": 0.2279, "step": 2090 }, { "epoch": 0.2464705882352941, "grad_norm": 5.576889991760254, "learning_rate": 3.2029999999999997e-06, "loss": 0.2159, "step": 2095 }, { "epoch": 0.24705882352941178, "grad_norm": 6.264328479766846, "learning_rate": 3.2004999999999997e-06, "loss": 0.1985, "step": 2100 }, { "epoch": 0.24764705882352941, "grad_norm": 6.178191184997559, "learning_rate": 3.1979999999999997e-06, "loss": 0.195, "step": 2105 }, { "epoch": 0.24823529411764705, "grad_norm": 4.896887302398682, "learning_rate": 3.1954999999999997e-06, "loss": 0.1827, "step": 2110 }, { "epoch": 0.2488235294117647, "grad_norm": 4.373982906341553, "learning_rate": 3.1929999999999998e-06, "loss": 0.1928, "step": 2115 }, { "epoch": 0.24941176470588236, "grad_norm": 5.759405136108398, "learning_rate": 3.1904999999999998e-06, "loss": 0.2379, "step": 2120 }, { "epoch": 0.25, "grad_norm": 5.012351989746094, "learning_rate": 3.1880000000000002e-06, "loss": 0.2022, "step": 2125 }, { "epoch": 0.25058823529411767, "grad_norm": 5.90570592880249, "learning_rate": 3.1855000000000002e-06, "loss": 0.1872, "step": 2130 }, { "epoch": 0.2511764705882353, "grad_norm": 5.164021015167236, "learning_rate": 3.183e-06, "loss": 0.214, "step": 2135 }, { "epoch": 0.25176470588235295, "grad_norm": 4.5676188468933105, "learning_rate": 3.1805e-06, "loss": 0.2105, "step": 2140 }, { "epoch": 0.2523529411764706, "grad_norm": 4.729475498199463, "learning_rate": 3.178e-06, "loss": 0.2098, "step": 2145 }, { "epoch": 0.2529411764705882, "grad_norm": 5.973753929138184, "learning_rate": 3.1755e-06, "loss": 0.2198, "step": 2150 }, { "epoch": 0.2535294117647059, "grad_norm": 4.154568672180176, "learning_rate": 3.173e-06, "loss": 0.2199, "step": 2155 }, { "epoch": 0.2541176470588235, "grad_norm": 4.8079142570495605, "learning_rate": 3.1705e-06, "loss": 0.2021, "step": 2160 }, { "epoch": 0.25470588235294117, "grad_norm": 4.679142475128174, "learning_rate": 3.168e-06, "loss": 0.204, "step": 2165 }, { "epoch": 0.25529411764705884, "grad_norm": 4.764895439147949, "learning_rate": 3.1655e-06, "loss": 0.2227, "step": 2170 }, { "epoch": 0.25588235294117645, "grad_norm": 5.741997718811035, "learning_rate": 3.1629999999999996e-06, "loss": 0.2326, "step": 2175 }, { "epoch": 0.2564705882352941, "grad_norm": 4.559428691864014, "learning_rate": 3.1604999999999996e-06, "loss": 0.1813, "step": 2180 }, { "epoch": 0.2570588235294118, "grad_norm": 5.247576713562012, "learning_rate": 3.1579999999999997e-06, "loss": 0.2199, "step": 2185 }, { "epoch": 0.2576470588235294, "grad_norm": 5.960746765136719, "learning_rate": 3.1554999999999997e-06, "loss": 0.2179, "step": 2190 }, { "epoch": 0.25823529411764706, "grad_norm": 6.225917816162109, "learning_rate": 3.1529999999999997e-06, "loss": 0.2424, "step": 2195 }, { "epoch": 0.25882352941176473, "grad_norm": 4.547580718994141, "learning_rate": 3.1504999999999997e-06, "loss": 0.2214, "step": 2200 }, { "epoch": 0.25941176470588234, "grad_norm": 5.808811664581299, "learning_rate": 3.148e-06, "loss": 0.206, "step": 2205 }, { "epoch": 0.26, "grad_norm": 4.810440540313721, "learning_rate": 3.1455e-06, "loss": 0.196, "step": 2210 }, { "epoch": 0.2605882352941176, "grad_norm": 5.063229560852051, "learning_rate": 3.143e-06, "loss": 0.2256, "step": 2215 }, { "epoch": 0.2611764705882353, "grad_norm": 4.889967918395996, "learning_rate": 3.1405e-06, "loss": 0.2122, "step": 2220 }, { "epoch": 0.26176470588235295, "grad_norm": 5.633798599243164, "learning_rate": 3.138e-06, "loss": 0.2022, "step": 2225 }, { "epoch": 0.26235294117647057, "grad_norm": 5.463539123535156, "learning_rate": 3.1355e-06, "loss": 0.2259, "step": 2230 }, { "epoch": 0.26294117647058823, "grad_norm": 3.872884750366211, "learning_rate": 3.133e-06, "loss": 0.2057, "step": 2235 }, { "epoch": 0.2635294117647059, "grad_norm": 4.860549449920654, "learning_rate": 3.1305e-06, "loss": 0.2118, "step": 2240 }, { "epoch": 0.2641176470588235, "grad_norm": 5.2378153800964355, "learning_rate": 3.128e-06, "loss": 0.2127, "step": 2245 }, { "epoch": 0.2647058823529412, "grad_norm": 5.856103420257568, "learning_rate": 3.1255e-06, "loss": 0.2304, "step": 2250 }, { "epoch": 0.26529411764705885, "grad_norm": 3.8151211738586426, "learning_rate": 3.123e-06, "loss": 0.2206, "step": 2255 }, { "epoch": 0.26588235294117646, "grad_norm": 4.382868766784668, "learning_rate": 3.1204999999999996e-06, "loss": 0.1968, "step": 2260 }, { "epoch": 0.2664705882352941, "grad_norm": 4.647398948669434, "learning_rate": 3.1179999999999996e-06, "loss": 0.2329, "step": 2265 }, { "epoch": 0.26705882352941174, "grad_norm": 4.926138877868652, "learning_rate": 3.1154999999999996e-06, "loss": 0.2361, "step": 2270 }, { "epoch": 0.2676470588235294, "grad_norm": 4.265030860900879, "learning_rate": 3.1129999999999997e-06, "loss": 0.201, "step": 2275 }, { "epoch": 0.26823529411764707, "grad_norm": 3.090471029281616, "learning_rate": 3.1104999999999997e-06, "loss": 0.2181, "step": 2280 }, { "epoch": 0.2688235294117647, "grad_norm": 4.185150623321533, "learning_rate": 3.108e-06, "loss": 0.2042, "step": 2285 }, { "epoch": 0.26941176470588235, "grad_norm": 3.9721622467041016, "learning_rate": 3.1055e-06, "loss": 0.2341, "step": 2290 }, { "epoch": 0.27, "grad_norm": 3.8742549419403076, "learning_rate": 3.103e-06, "loss": 0.1716, "step": 2295 }, { "epoch": 0.27058823529411763, "grad_norm": 3.6804440021514893, "learning_rate": 3.1005e-06, "loss": 0.2127, "step": 2300 }, { "epoch": 0.2711764705882353, "grad_norm": 3.6017045974731445, "learning_rate": 3.098e-06, "loss": 0.2131, "step": 2305 }, { "epoch": 0.27176470588235296, "grad_norm": 4.987339973449707, "learning_rate": 3.0955e-06, "loss": 0.2243, "step": 2310 }, { "epoch": 0.2723529411764706, "grad_norm": 3.7475435733795166, "learning_rate": 3.093e-06, "loss": 0.3487, "step": 2315 }, { "epoch": 0.27294117647058824, "grad_norm": 4.730339527130127, "learning_rate": 3.0905e-06, "loss": 0.3012, "step": 2320 }, { "epoch": 0.2735294117647059, "grad_norm": 3.88236403465271, "learning_rate": 3.088e-06, "loss": 0.2647, "step": 2325 }, { "epoch": 0.2741176470588235, "grad_norm": 5.1080756187438965, "learning_rate": 3.0855e-06, "loss": 0.3404, "step": 2330 }, { "epoch": 0.2747058823529412, "grad_norm": 5.076879501342773, "learning_rate": 3.083e-06, "loss": 0.2942, "step": 2335 }, { "epoch": 0.2752941176470588, "grad_norm": 3.9446821212768555, "learning_rate": 3.0804999999999996e-06, "loss": 0.2747, "step": 2340 }, { "epoch": 0.27588235294117647, "grad_norm": 6.164114952087402, "learning_rate": 3.0779999999999996e-06, "loss": 0.2812, "step": 2345 }, { "epoch": 0.27647058823529413, "grad_norm": 6.394044399261475, "learning_rate": 3.0754999999999996e-06, "loss": 0.2593, "step": 2350 }, { "epoch": 0.27705882352941175, "grad_norm": 6.166201114654541, "learning_rate": 3.0729999999999996e-06, "loss": 0.2563, "step": 2355 }, { "epoch": 0.2776470588235294, "grad_norm": 4.848891735076904, "learning_rate": 3.0705e-06, "loss": 0.1996, "step": 2360 }, { "epoch": 0.2782352941176471, "grad_norm": 5.803962707519531, "learning_rate": 3.068e-06, "loss": 0.1378, "step": 2365 }, { "epoch": 0.2788235294117647, "grad_norm": 4.9175238609313965, "learning_rate": 3.0655e-06, "loss": 0.1776, "step": 2370 }, { "epoch": 0.27941176470588236, "grad_norm": 5.20488166809082, "learning_rate": 3.063e-06, "loss": 0.1995, "step": 2375 }, { "epoch": 0.28, "grad_norm": 7.464907646179199, "learning_rate": 3.0605e-06, "loss": 0.207, "step": 2380 }, { "epoch": 0.28058823529411764, "grad_norm": 7.0938286781311035, "learning_rate": 3.0579999999999998e-06, "loss": 0.1798, "step": 2385 }, { "epoch": 0.2811764705882353, "grad_norm": 7.009293079376221, "learning_rate": 3.0555e-06, "loss": 0.1559, "step": 2390 }, { "epoch": 0.2817647058823529, "grad_norm": 7.157477855682373, "learning_rate": 3.053e-06, "loss": 0.1915, "step": 2395 }, { "epoch": 0.2823529411764706, "grad_norm": 5.487789630889893, "learning_rate": 3.0505e-06, "loss": 0.1756, "step": 2400 }, { "epoch": 0.28294117647058825, "grad_norm": 5.231931209564209, "learning_rate": 3.048e-06, "loss": 0.1879, "step": 2405 }, { "epoch": 0.28352941176470586, "grad_norm": 4.529813766479492, "learning_rate": 3.0455e-06, "loss": 0.1986, "step": 2410 }, { "epoch": 0.28411764705882353, "grad_norm": 5.955358982086182, "learning_rate": 3.043e-06, "loss": 0.2028, "step": 2415 }, { "epoch": 0.2847058823529412, "grad_norm": 7.148882865905762, "learning_rate": 3.0405e-06, "loss": 0.2297, "step": 2420 }, { "epoch": 0.2852941176470588, "grad_norm": 4.61805534362793, "learning_rate": 3.0379999999999995e-06, "loss": 0.2024, "step": 2425 }, { "epoch": 0.2858823529411765, "grad_norm": 5.262239456176758, "learning_rate": 3.0354999999999996e-06, "loss": 0.1794, "step": 2430 }, { "epoch": 0.28647058823529414, "grad_norm": 5.10941743850708, "learning_rate": 3.0329999999999996e-06, "loss": 0.2224, "step": 2435 }, { "epoch": 0.28705882352941176, "grad_norm": 5.245163917541504, "learning_rate": 3.0305e-06, "loss": 0.2207, "step": 2440 }, { "epoch": 0.2876470588235294, "grad_norm": 4.112963676452637, "learning_rate": 3.028e-06, "loss": 0.1873, "step": 2445 }, { "epoch": 0.28823529411764703, "grad_norm": 5.325206279754639, "learning_rate": 3.0255e-06, "loss": 0.2145, "step": 2450 }, { "epoch": 0.2888235294117647, "grad_norm": 6.623908996582031, "learning_rate": 3.023e-06, "loss": 0.2039, "step": 2455 }, { "epoch": 0.28941176470588237, "grad_norm": 5.79390811920166, "learning_rate": 3.0205e-06, "loss": 0.193, "step": 2460 }, { "epoch": 0.29, "grad_norm": 5.428816795349121, "learning_rate": 3.0179999999999997e-06, "loss": 0.2493, "step": 2465 }, { "epoch": 0.29058823529411765, "grad_norm": 5.027050018310547, "learning_rate": 3.0154999999999998e-06, "loss": 0.2132, "step": 2470 }, { "epoch": 0.2911764705882353, "grad_norm": 4.759785175323486, "learning_rate": 3.0129999999999998e-06, "loss": 0.2256, "step": 2475 }, { "epoch": 0.2917647058823529, "grad_norm": 4.683269023895264, "learning_rate": 3.0105e-06, "loss": 0.1997, "step": 2480 }, { "epoch": 0.2923529411764706, "grad_norm": 5.12821626663208, "learning_rate": 3.008e-06, "loss": 0.2117, "step": 2485 }, { "epoch": 0.29294117647058826, "grad_norm": 5.700200080871582, "learning_rate": 3.0055e-06, "loss": 0.2265, "step": 2490 }, { "epoch": 0.29352941176470587, "grad_norm": 4.457401752471924, "learning_rate": 3.003e-06, "loss": 0.2105, "step": 2495 }, { "epoch": 0.29411764705882354, "grad_norm": 4.90303897857666, "learning_rate": 3.0005e-06, "loss": 0.2133, "step": 2500 }, { "epoch": 0.29411764705882354, "eval_loss": 0.16619360446929932, "eval_runtime": 217.0613, "eval_samples_per_second": 8.122, "eval_steps_per_second": 2.032, "eval_wer": 9.879647925273936, "step": 2500 }, { "epoch": 0.29470588235294115, "grad_norm": 5.008613586425781, "learning_rate": 2.998e-06, "loss": 0.1984, "step": 2505 }, { "epoch": 0.2952941176470588, "grad_norm": 3.973461866378784, "learning_rate": 2.9954999999999995e-06, "loss": 0.203, "step": 2510 }, { "epoch": 0.2958823529411765, "grad_norm": 5.4730658531188965, "learning_rate": 2.9929999999999996e-06, "loss": 0.2143, "step": 2515 }, { "epoch": 0.2964705882352941, "grad_norm": 6.639011859893799, "learning_rate": 2.9905e-06, "loss": 0.2741, "step": 2520 }, { "epoch": 0.29705882352941176, "grad_norm": 4.806242942810059, "learning_rate": 2.988e-06, "loss": 0.3175, "step": 2525 }, { "epoch": 0.29764705882352943, "grad_norm": 4.492772102355957, "learning_rate": 2.9855e-06, "loss": 0.2676, "step": 2530 }, { "epoch": 0.29823529411764704, "grad_norm": 3.860215902328491, "learning_rate": 2.983e-06, "loss": 0.2938, "step": 2535 }, { "epoch": 0.2988235294117647, "grad_norm": 4.1058454513549805, "learning_rate": 2.9805e-06, "loss": 0.2602, "step": 2540 }, { "epoch": 0.2994117647058824, "grad_norm": 5.554664134979248, "learning_rate": 2.978e-06, "loss": 0.3559, "step": 2545 }, { "epoch": 0.3, "grad_norm": 4.1734490394592285, "learning_rate": 2.9754999999999997e-06, "loss": 0.3544, "step": 2550 }, { "epoch": 0.30058823529411766, "grad_norm": 4.297212600708008, "learning_rate": 2.9729999999999997e-06, "loss": 0.3786, "step": 2555 }, { "epoch": 0.30117647058823527, "grad_norm": 3.802435874938965, "learning_rate": 2.9704999999999998e-06, "loss": 0.3964, "step": 2560 }, { "epoch": 0.30176470588235293, "grad_norm": 4.643036842346191, "learning_rate": 2.968e-06, "loss": 0.4043, "step": 2565 }, { "epoch": 0.3023529411764706, "grad_norm": 5.035863876342773, "learning_rate": 2.9655e-06, "loss": 0.3489, "step": 2570 }, { "epoch": 0.3029411764705882, "grad_norm": 6.153038024902344, "learning_rate": 2.963e-06, "loss": 0.3202, "step": 2575 }, { "epoch": 0.3035294117647059, "grad_norm": 3.727543592453003, "learning_rate": 2.9605e-06, "loss": 0.2768, "step": 2580 }, { "epoch": 0.30411764705882355, "grad_norm": 5.493717193603516, "learning_rate": 2.958e-06, "loss": 0.2297, "step": 2585 }, { "epoch": 0.30470588235294116, "grad_norm": 5.340458869934082, "learning_rate": 2.9554999999999995e-06, "loss": 0.2466, "step": 2590 }, { "epoch": 0.3052941176470588, "grad_norm": 4.706632137298584, "learning_rate": 2.9529999999999995e-06, "loss": 0.2211, "step": 2595 }, { "epoch": 0.3058823529411765, "grad_norm": 5.6392364501953125, "learning_rate": 2.9505e-06, "loss": 0.24, "step": 2600 }, { "epoch": 0.3064705882352941, "grad_norm": 3.758432388305664, "learning_rate": 2.948e-06, "loss": 0.1658, "step": 2605 }, { "epoch": 0.3070588235294118, "grad_norm": 3.45859956741333, "learning_rate": 2.9455e-06, "loss": 0.1849, "step": 2610 }, { "epoch": 0.3076470588235294, "grad_norm": 5.421919822692871, "learning_rate": 2.943e-06, "loss": 0.2157, "step": 2615 }, { "epoch": 0.30823529411764705, "grad_norm": 4.73488712310791, "learning_rate": 2.9405e-06, "loss": 0.1977, "step": 2620 }, { "epoch": 0.3088235294117647, "grad_norm": 4.134047031402588, "learning_rate": 2.938e-06, "loss": 0.2151, "step": 2625 }, { "epoch": 0.30941176470588233, "grad_norm": 4.319257736206055, "learning_rate": 2.9355e-06, "loss": 0.1751, "step": 2630 }, { "epoch": 0.31, "grad_norm": 4.077624797821045, "learning_rate": 2.9329999999999997e-06, "loss": 0.1958, "step": 2635 }, { "epoch": 0.31058823529411766, "grad_norm": 5.969017028808594, "learning_rate": 2.9304999999999997e-06, "loss": 0.1912, "step": 2640 }, { "epoch": 0.3111764705882353, "grad_norm": 4.853809356689453, "learning_rate": 2.9279999999999997e-06, "loss": 0.2125, "step": 2645 }, { "epoch": 0.31176470588235294, "grad_norm": 5.439288139343262, "learning_rate": 2.9254999999999998e-06, "loss": 0.259, "step": 2650 }, { "epoch": 0.3123529411764706, "grad_norm": 3.9751577377319336, "learning_rate": 2.923e-06, "loss": 0.2665, "step": 2655 }, { "epoch": 0.3129411764705882, "grad_norm": 4.290473937988281, "learning_rate": 2.9205e-06, "loss": 0.344, "step": 2660 }, { "epoch": 0.3135294117647059, "grad_norm": 4.659896373748779, "learning_rate": 2.918e-06, "loss": 0.3378, "step": 2665 }, { "epoch": 0.31411764705882356, "grad_norm": 4.473114490509033, "learning_rate": 2.9155e-06, "loss": 0.3155, "step": 2670 }, { "epoch": 0.31470588235294117, "grad_norm": 4.87158203125, "learning_rate": 2.9129999999999995e-06, "loss": 0.3286, "step": 2675 }, { "epoch": 0.31529411764705884, "grad_norm": 4.1088643074035645, "learning_rate": 2.9105e-06, "loss": 0.3658, "step": 2680 }, { "epoch": 0.31588235294117645, "grad_norm": 4.646324157714844, "learning_rate": 2.908e-06, "loss": 0.3572, "step": 2685 }, { "epoch": 0.3164705882352941, "grad_norm": 4.924124717712402, "learning_rate": 2.9055e-06, "loss": 0.3327, "step": 2690 }, { "epoch": 0.3170588235294118, "grad_norm": 4.581195831298828, "learning_rate": 2.903e-06, "loss": 0.3564, "step": 2695 }, { "epoch": 0.3176470588235294, "grad_norm": 4.5321245193481445, "learning_rate": 2.9005e-06, "loss": 0.3345, "step": 2700 }, { "epoch": 0.31823529411764706, "grad_norm": 5.16433048248291, "learning_rate": 2.898e-06, "loss": 0.3524, "step": 2705 }, { "epoch": 0.31882352941176473, "grad_norm": 4.225367546081543, "learning_rate": 2.8955e-06, "loss": 0.4112, "step": 2710 }, { "epoch": 0.31941176470588234, "grad_norm": 3.7374627590179443, "learning_rate": 2.8929999999999997e-06, "loss": 0.2934, "step": 2715 }, { "epoch": 0.32, "grad_norm": 3.9707484245300293, "learning_rate": 2.8904999999999997e-06, "loss": 0.286, "step": 2720 }, { "epoch": 0.3205882352941177, "grad_norm": 3.880120038986206, "learning_rate": 2.8879999999999997e-06, "loss": 0.219, "step": 2725 }, { "epoch": 0.3211764705882353, "grad_norm": 5.055685043334961, "learning_rate": 2.8854999999999997e-06, "loss": 0.3275, "step": 2730 }, { "epoch": 0.32176470588235295, "grad_norm": 4.223024368286133, "learning_rate": 2.8829999999999998e-06, "loss": 0.2448, "step": 2735 }, { "epoch": 0.32235294117647056, "grad_norm": 4.628231048583984, "learning_rate": 2.8804999999999998e-06, "loss": 0.3246, "step": 2740 }, { "epoch": 0.32294117647058823, "grad_norm": 4.0539021492004395, "learning_rate": 2.878e-06, "loss": 0.2669, "step": 2745 }, { "epoch": 0.3235294117647059, "grad_norm": 3.4415056705474854, "learning_rate": 2.8755e-06, "loss": 0.2245, "step": 2750 }, { "epoch": 0.3241176470588235, "grad_norm": 3.7217116355895996, "learning_rate": 2.8730000000000003e-06, "loss": 0.1956, "step": 2755 }, { "epoch": 0.3247058823529412, "grad_norm": 4.077893257141113, "learning_rate": 2.8705e-06, "loss": 0.2036, "step": 2760 }, { "epoch": 0.32529411764705884, "grad_norm": 4.327847480773926, "learning_rate": 2.868e-06, "loss": 0.2508, "step": 2765 }, { "epoch": 0.32588235294117646, "grad_norm": 3.9320521354675293, "learning_rate": 2.8655e-06, "loss": 0.261, "step": 2770 }, { "epoch": 0.3264705882352941, "grad_norm": 3.6010005474090576, "learning_rate": 2.863e-06, "loss": 0.2509, "step": 2775 }, { "epoch": 0.3270588235294118, "grad_norm": 5.048023223876953, "learning_rate": 2.8605e-06, "loss": 0.2111, "step": 2780 }, { "epoch": 0.3276470588235294, "grad_norm": 4.343364238739014, "learning_rate": 2.858e-06, "loss": 0.1259, "step": 2785 }, { "epoch": 0.32823529411764707, "grad_norm": 3.8193576335906982, "learning_rate": 2.8555e-06, "loss": 0.1207, "step": 2790 }, { "epoch": 0.3288235294117647, "grad_norm": 4.694075584411621, "learning_rate": 2.853e-06, "loss": 0.1292, "step": 2795 }, { "epoch": 0.32941176470588235, "grad_norm": 4.638583660125732, "learning_rate": 2.8504999999999996e-06, "loss": 0.1158, "step": 2800 }, { "epoch": 0.33, "grad_norm": 3.3456101417541504, "learning_rate": 2.8479999999999997e-06, "loss": 0.1121, "step": 2805 }, { "epoch": 0.3305882352941176, "grad_norm": 3.9392271041870117, "learning_rate": 2.8454999999999997e-06, "loss": 0.0818, "step": 2810 }, { "epoch": 0.3311764705882353, "grad_norm": 4.873154640197754, "learning_rate": 2.8429999999999997e-06, "loss": 0.1114, "step": 2815 }, { "epoch": 0.33176470588235296, "grad_norm": 5.822360992431641, "learning_rate": 2.8404999999999997e-06, "loss": 0.1043, "step": 2820 }, { "epoch": 0.3323529411764706, "grad_norm": 5.192411422729492, "learning_rate": 2.8379999999999998e-06, "loss": 0.1204, "step": 2825 }, { "epoch": 0.33294117647058824, "grad_norm": 3.822308301925659, "learning_rate": 2.8355e-06, "loss": 0.078, "step": 2830 }, { "epoch": 0.3335294117647059, "grad_norm": 4.122225761413574, "learning_rate": 2.8330000000000002e-06, "loss": 0.0762, "step": 2835 }, { "epoch": 0.3341176470588235, "grad_norm": 6.4448628425598145, "learning_rate": 2.8305e-06, "loss": 0.156, "step": 2840 }, { "epoch": 0.3347058823529412, "grad_norm": 4.499393939971924, "learning_rate": 2.828e-06, "loss": 0.1499, "step": 2845 }, { "epoch": 0.3352941176470588, "grad_norm": 5.191849231719971, "learning_rate": 2.8255e-06, "loss": 0.1609, "step": 2850 }, { "epoch": 0.33588235294117647, "grad_norm": 7.126627445220947, "learning_rate": 2.823e-06, "loss": 0.2245, "step": 2855 }, { "epoch": 0.33647058823529413, "grad_norm": 5.2784857749938965, "learning_rate": 2.8205e-06, "loss": 0.1654, "step": 2860 }, { "epoch": 0.33705882352941174, "grad_norm": 5.031002998352051, "learning_rate": 2.818e-06, "loss": 0.202, "step": 2865 }, { "epoch": 0.3376470588235294, "grad_norm": 4.807315349578857, "learning_rate": 2.8155e-06, "loss": 0.1848, "step": 2870 }, { "epoch": 0.3382352941176471, "grad_norm": 4.608419895172119, "learning_rate": 2.813e-06, "loss": 0.207, "step": 2875 }, { "epoch": 0.3388235294117647, "grad_norm": 4.97172212600708, "learning_rate": 2.8105e-06, "loss": 0.2007, "step": 2880 }, { "epoch": 0.33941176470588236, "grad_norm": 4.8092217445373535, "learning_rate": 2.8079999999999996e-06, "loss": 0.2072, "step": 2885 }, { "epoch": 0.34, "grad_norm": 4.74199104309082, "learning_rate": 2.8054999999999997e-06, "loss": 0.1971, "step": 2890 }, { "epoch": 0.34058823529411764, "grad_norm": 6.263240814208984, "learning_rate": 2.8029999999999997e-06, "loss": 0.1989, "step": 2895 }, { "epoch": 0.3411764705882353, "grad_norm": 4.510291576385498, "learning_rate": 2.8004999999999997e-06, "loss": 0.1989, "step": 2900 }, { "epoch": 0.3417647058823529, "grad_norm": 5.854542255401611, "learning_rate": 2.7979999999999997e-06, "loss": 0.203, "step": 2905 }, { "epoch": 0.3423529411764706, "grad_norm": 4.230734825134277, "learning_rate": 2.7955e-06, "loss": 0.2183, "step": 2910 }, { "epoch": 0.34294117647058825, "grad_norm": 5.637038230895996, "learning_rate": 2.793e-06, "loss": 0.2241, "step": 2915 }, { "epoch": 0.34352941176470586, "grad_norm": 5.474928855895996, "learning_rate": 2.7905000000000002e-06, "loss": 0.1904, "step": 2920 }, { "epoch": 0.34411764705882353, "grad_norm": 5.342573165893555, "learning_rate": 2.788e-06, "loss": 0.2241, "step": 2925 }, { "epoch": 0.3447058823529412, "grad_norm": 4.708014965057373, "learning_rate": 2.7855e-06, "loss": 0.1727, "step": 2930 }, { "epoch": 0.3452941176470588, "grad_norm": 5.215411186218262, "learning_rate": 2.783e-06, "loss": 0.1947, "step": 2935 }, { "epoch": 0.3458823529411765, "grad_norm": 4.923801898956299, "learning_rate": 2.7805e-06, "loss": 0.1791, "step": 2940 }, { "epoch": 0.34647058823529414, "grad_norm": 4.910918712615967, "learning_rate": 2.778e-06, "loss": 0.193, "step": 2945 }, { "epoch": 0.34705882352941175, "grad_norm": 5.1346001625061035, "learning_rate": 2.7755e-06, "loss": 0.1611, "step": 2950 }, { "epoch": 0.3476470588235294, "grad_norm": 7.441149711608887, "learning_rate": 2.773e-06, "loss": 0.2066, "step": 2955 }, { "epoch": 0.34823529411764703, "grad_norm": 4.721667289733887, "learning_rate": 2.7705e-06, "loss": 0.2068, "step": 2960 }, { "epoch": 0.3488235294117647, "grad_norm": 5.0545735359191895, "learning_rate": 2.7679999999999996e-06, "loss": 0.1962, "step": 2965 }, { "epoch": 0.34941176470588237, "grad_norm": 5.365943908691406, "learning_rate": 2.7654999999999996e-06, "loss": 0.1867, "step": 2970 }, { "epoch": 1.0003529411764707, "grad_norm": 4.034170627593994, "learning_rate": 2.7629999999999996e-06, "loss": 0.3078, "step": 2975 }, { "epoch": 1.0009411764705882, "grad_norm": 3.3441267013549805, "learning_rate": 2.7604999999999997e-06, "loss": 0.296, "step": 2980 }, { "epoch": 1.0015294117647058, "grad_norm": 3.7983829975128174, "learning_rate": 2.7579999999999997e-06, "loss": 0.3596, "step": 2985 }, { "epoch": 1.0021176470588236, "grad_norm": 4.304010391235352, "learning_rate": 2.7555e-06, "loss": 0.2993, "step": 2990 }, { "epoch": 1.0027058823529411, "grad_norm": 3.8553977012634277, "learning_rate": 2.753e-06, "loss": 0.273, "step": 2995 }, { "epoch": 1.003294117647059, "grad_norm": 4.174501419067383, "learning_rate": 2.7505e-06, "loss": 0.2653, "step": 3000 }, { "epoch": 1.003294117647059, "eval_loss": 0.14667804539203644, "eval_runtime": 213.0903, "eval_samples_per_second": 8.273, "eval_steps_per_second": 2.07, "eval_wer": 9.583258487515717, "step": 3000 }, { "epoch": 1.0038823529411764, "grad_norm": 3.455491781234741, "learning_rate": 2.748e-06, "loss": 0.2995, "step": 3005 }, { "epoch": 1.0044705882352942, "grad_norm": 4.2290215492248535, "learning_rate": 2.7455e-06, "loss": 0.3157, "step": 3010 }, { "epoch": 1.0050588235294118, "grad_norm": 3.5927371978759766, "learning_rate": 2.743e-06, "loss": 0.2511, "step": 3015 }, { "epoch": 1.0056470588235293, "grad_norm": 4.1396803855896, "learning_rate": 2.7405e-06, "loss": 0.3035, "step": 3020 }, { "epoch": 1.0062352941176471, "grad_norm": 4.662309646606445, "learning_rate": 2.738e-06, "loss": 0.3241, "step": 3025 }, { "epoch": 1.0068235294117647, "grad_norm": 4.161059379577637, "learning_rate": 2.7355e-06, "loss": 0.3152, "step": 3030 }, { "epoch": 1.0074117647058825, "grad_norm": 3.3734641075134277, "learning_rate": 2.733e-06, "loss": 0.2084, "step": 3035 }, { "epoch": 1.008, "grad_norm": 5.047889232635498, "learning_rate": 2.7305e-06, "loss": 0.1924, "step": 3040 }, { "epoch": 1.0085882352941176, "grad_norm": 4.436681747436523, "learning_rate": 2.728e-06, "loss": 0.1911, "step": 3045 }, { "epoch": 1.0091764705882353, "grad_norm": 4.171289443969727, "learning_rate": 2.7254999999999996e-06, "loss": 0.2308, "step": 3050 }, { "epoch": 1.009764705882353, "grad_norm": 4.373075485229492, "learning_rate": 2.7229999999999996e-06, "loss": 0.1359, "step": 3055 }, { "epoch": 1.0103529411764707, "grad_norm": 5.466518878936768, "learning_rate": 2.7204999999999996e-06, "loss": 0.1997, "step": 3060 }, { "epoch": 1.0109411764705882, "grad_norm": 4.17453145980835, "learning_rate": 2.7179999999999996e-06, "loss": 0.2215, "step": 3065 }, { "epoch": 1.0115294117647058, "grad_norm": 5.046319484710693, "learning_rate": 2.7155e-06, "loss": 0.1638, "step": 3070 }, { "epoch": 1.0121176470588236, "grad_norm": 4.6446213722229, "learning_rate": 2.713e-06, "loss": 0.19, "step": 3075 }, { "epoch": 1.0127058823529411, "grad_norm": 4.730209827423096, "learning_rate": 2.7105e-06, "loss": 0.1814, "step": 3080 }, { "epoch": 1.013294117647059, "grad_norm": 3.889785051345825, "learning_rate": 2.708e-06, "loss": 0.1764, "step": 3085 }, { "epoch": 1.0138823529411765, "grad_norm": 4.95405387878418, "learning_rate": 2.7054999999999998e-06, "loss": 0.2288, "step": 3090 }, { "epoch": 1.014470588235294, "grad_norm": 3.5910961627960205, "learning_rate": 2.7029999999999998e-06, "loss": 0.2303, "step": 3095 }, { "epoch": 1.0150588235294118, "grad_norm": 4.015872001647949, "learning_rate": 2.7005e-06, "loss": 0.2643, "step": 3100 }, { "epoch": 1.0156470588235293, "grad_norm": 5.291968822479248, "learning_rate": 2.698e-06, "loss": 0.2544, "step": 3105 }, { "epoch": 1.0162352941176471, "grad_norm": 3.9181082248687744, "learning_rate": 2.6955e-06, "loss": 0.2789, "step": 3110 }, { "epoch": 1.0168235294117647, "grad_norm": 4.120726585388184, "learning_rate": 2.693e-06, "loss": 0.2971, "step": 3115 }, { "epoch": 1.0174117647058825, "grad_norm": 3.8018078804016113, "learning_rate": 2.6905e-06, "loss": 0.2651, "step": 3120 }, { "epoch": 1.018, "grad_norm": 4.393831253051758, "learning_rate": 2.688e-06, "loss": 0.2676, "step": 3125 }, { "epoch": 1.0185882352941176, "grad_norm": 4.070085525512695, "learning_rate": 2.6855e-06, "loss": 0.3017, "step": 3130 }, { "epoch": 1.0191764705882354, "grad_norm": 4.3897905349731445, "learning_rate": 2.6829999999999996e-06, "loss": 0.3384, "step": 3135 }, { "epoch": 1.019764705882353, "grad_norm": 4.135372638702393, "learning_rate": 2.6804999999999996e-06, "loss": 0.3197, "step": 3140 }, { "epoch": 1.0203529411764707, "grad_norm": 3.8027613162994385, "learning_rate": 2.678e-06, "loss": 0.2773, "step": 3145 }, { "epoch": 1.0209411764705882, "grad_norm": 3.2393133640289307, "learning_rate": 2.6755e-06, "loss": 0.2917, "step": 3150 }, { "epoch": 1.0215294117647058, "grad_norm": 3.6308064460754395, "learning_rate": 2.673e-06, "loss": 0.2456, "step": 3155 }, { "epoch": 1.0221176470588236, "grad_norm": 3.6334359645843506, "learning_rate": 2.6705e-06, "loss": 0.2337, "step": 3160 }, { "epoch": 1.0227058823529411, "grad_norm": 4.518722057342529, "learning_rate": 2.668e-06, "loss": 0.241, "step": 3165 }, { "epoch": 1.023294117647059, "grad_norm": 4.979987144470215, "learning_rate": 2.6655e-06, "loss": 0.2227, "step": 3170 }, { "epoch": 1.0238823529411765, "grad_norm": 3.936903953552246, "learning_rate": 2.6629999999999997e-06, "loss": 0.19, "step": 3175 }, { "epoch": 1.024470588235294, "grad_norm": 4.249811172485352, "learning_rate": 2.6604999999999998e-06, "loss": 0.1637, "step": 3180 }, { "epoch": 1.0250588235294118, "grad_norm": 4.781577110290527, "learning_rate": 2.658e-06, "loss": 0.1766, "step": 3185 }, { "epoch": 1.0256470588235294, "grad_norm": 3.753575325012207, "learning_rate": 2.6555e-06, "loss": 0.1422, "step": 3190 }, { "epoch": 1.0262352941176471, "grad_norm": 4.361690044403076, "learning_rate": 2.653e-06, "loss": 0.174, "step": 3195 }, { "epoch": 1.0268235294117647, "grad_norm": 3.8064029216766357, "learning_rate": 2.6505e-06, "loss": 0.1594, "step": 3200 }, { "epoch": 1.0274117647058822, "grad_norm": 4.04011869430542, "learning_rate": 2.648e-06, "loss": 0.1615, "step": 3205 }, { "epoch": 1.028, "grad_norm": 3.3099052906036377, "learning_rate": 2.6455e-06, "loss": 0.1616, "step": 3210 }, { "epoch": 1.0285882352941176, "grad_norm": 3.631289005279541, "learning_rate": 2.6429999999999995e-06, "loss": 0.1398, "step": 3215 }, { "epoch": 1.0291764705882354, "grad_norm": 5.033906936645508, "learning_rate": 2.6404999999999995e-06, "loss": 0.1676, "step": 3220 }, { "epoch": 1.029764705882353, "grad_norm": 3.927427053451538, "learning_rate": 2.638e-06, "loss": 0.1474, "step": 3225 }, { "epoch": 1.0303529411764707, "grad_norm": 4.770748138427734, "learning_rate": 2.6355e-06, "loss": 0.1619, "step": 3230 }, { "epoch": 1.0309411764705882, "grad_norm": 4.601754665374756, "learning_rate": 2.633e-06, "loss": 0.1732, "step": 3235 }, { "epoch": 1.0315294117647058, "grad_norm": 5.260937690734863, "learning_rate": 2.6305e-06, "loss": 0.1731, "step": 3240 }, { "epoch": 1.0321176470588236, "grad_norm": 4.216193199157715, "learning_rate": 2.628e-06, "loss": 0.1479, "step": 3245 }, { "epoch": 1.0327058823529411, "grad_norm": 4.190891265869141, "learning_rate": 2.6255e-06, "loss": 0.1635, "step": 3250 }, { "epoch": 1.033294117647059, "grad_norm": 4.654452323913574, "learning_rate": 2.623e-06, "loss": 0.1705, "step": 3255 }, { "epoch": 1.0338823529411765, "grad_norm": 4.3641557693481445, "learning_rate": 2.6204999999999997e-06, "loss": 0.1533, "step": 3260 }, { "epoch": 1.034470588235294, "grad_norm": 4.996956825256348, "learning_rate": 2.6179999999999998e-06, "loss": 0.1689, "step": 3265 }, { "epoch": 1.0350588235294118, "grad_norm": 4.303079605102539, "learning_rate": 2.6154999999999998e-06, "loss": 0.158, "step": 3270 }, { "epoch": 1.0356470588235294, "grad_norm": 4.046682357788086, "learning_rate": 2.613e-06, "loss": 0.1868, "step": 3275 }, { "epoch": 1.0362352941176471, "grad_norm": 4.775552749633789, "learning_rate": 2.6105e-06, "loss": 0.2023, "step": 3280 }, { "epoch": 1.0368235294117647, "grad_norm": 4.558648109436035, "learning_rate": 2.608e-06, "loss": 0.2905, "step": 3285 }, { "epoch": 1.0374117647058823, "grad_norm": 4.280534744262695, "learning_rate": 2.6055e-06, "loss": 0.2356, "step": 3290 }, { "epoch": 1.038, "grad_norm": 4.513224124908447, "learning_rate": 2.603e-06, "loss": 0.2785, "step": 3295 }, { "epoch": 1.0385882352941176, "grad_norm": 4.837095737457275, "learning_rate": 2.6004999999999995e-06, "loss": 0.2671, "step": 3300 }, { "epoch": 1.0391764705882354, "grad_norm": 4.992514133453369, "learning_rate": 2.598e-06, "loss": 0.2885, "step": 3305 }, { "epoch": 1.039764705882353, "grad_norm": 4.299206733703613, "learning_rate": 2.5955e-06, "loss": 0.3128, "step": 3310 }, { "epoch": 1.0403529411764705, "grad_norm": 4.676679611206055, "learning_rate": 2.593e-06, "loss": 0.294, "step": 3315 }, { "epoch": 1.0409411764705883, "grad_norm": 4.482694625854492, "learning_rate": 2.5905e-06, "loss": 0.3051, "step": 3320 }, { "epoch": 1.0415294117647058, "grad_norm": 3.8015661239624023, "learning_rate": 2.588e-06, "loss": 0.3483, "step": 3325 }, { "epoch": 1.0421176470588236, "grad_norm": 4.020302772521973, "learning_rate": 2.5855e-06, "loss": 0.2885, "step": 3330 }, { "epoch": 1.0427058823529411, "grad_norm": 5.928913116455078, "learning_rate": 2.583e-06, "loss": 0.3076, "step": 3335 }, { "epoch": 1.043294117647059, "grad_norm": 4.010854721069336, "learning_rate": 2.5804999999999997e-06, "loss": 0.2727, "step": 3340 }, { "epoch": 1.0438823529411765, "grad_norm": 3.748196840286255, "learning_rate": 2.5779999999999997e-06, "loss": 0.2638, "step": 3345 }, { "epoch": 1.044470588235294, "grad_norm": 5.045647144317627, "learning_rate": 2.5754999999999997e-06, "loss": 0.2554, "step": 3350 }, { "epoch": 1.0450588235294118, "grad_norm": 3.638298749923706, "learning_rate": 2.5729999999999998e-06, "loss": 0.2272, "step": 3355 }, { "epoch": 1.0456470588235294, "grad_norm": 4.550037860870361, "learning_rate": 2.5704999999999998e-06, "loss": 0.2023, "step": 3360 }, { "epoch": 1.0462352941176472, "grad_norm": 5.605539798736572, "learning_rate": 2.568e-06, "loss": 0.2118, "step": 3365 }, { "epoch": 1.0468235294117647, "grad_norm": 3.8483738899230957, "learning_rate": 2.5655e-06, "loss": 0.1768, "step": 3370 }, { "epoch": 1.0474117647058823, "grad_norm": 4.328429698944092, "learning_rate": 2.563e-06, "loss": 0.1831, "step": 3375 }, { "epoch": 1.048, "grad_norm": 3.53432035446167, "learning_rate": 2.5605000000000003e-06, "loss": 0.1453, "step": 3380 }, { "epoch": 1.0485882352941176, "grad_norm": 5.663638591766357, "learning_rate": 2.558e-06, "loss": 0.1675, "step": 3385 }, { "epoch": 1.0491764705882354, "grad_norm": 3.8146209716796875, "learning_rate": 2.5555e-06, "loss": 0.1436, "step": 3390 }, { "epoch": 1.049764705882353, "grad_norm": 4.633802890777588, "learning_rate": 2.553e-06, "loss": 0.1389, "step": 3395 }, { "epoch": 1.0503529411764705, "grad_norm": 4.166659355163574, "learning_rate": 2.5505e-06, "loss": 0.1411, "step": 3400 }, { "epoch": 1.0509411764705883, "grad_norm": 4.166285514831543, "learning_rate": 2.548e-06, "loss": 0.1577, "step": 3405 }, { "epoch": 1.0515294117647058, "grad_norm": 5.021528244018555, "learning_rate": 2.5455e-06, "loss": 0.1521, "step": 3410 }, { "epoch": 1.0521176470588236, "grad_norm": 4.446382522583008, "learning_rate": 2.543e-06, "loss": 0.1748, "step": 3415 }, { "epoch": 1.0527058823529412, "grad_norm": 4.952483177185059, "learning_rate": 2.5405e-06, "loss": 0.1555, "step": 3420 }, { "epoch": 1.053294117647059, "grad_norm": 4.417530536651611, "learning_rate": 2.5379999999999997e-06, "loss": 0.1711, "step": 3425 }, { "epoch": 1.0538823529411765, "grad_norm": 5.956252098083496, "learning_rate": 2.5354999999999997e-06, "loss": 0.1773, "step": 3430 }, { "epoch": 1.054470588235294, "grad_norm": 3.5904104709625244, "learning_rate": 2.5329999999999997e-06, "loss": 0.1468, "step": 3435 }, { "epoch": 1.0550588235294118, "grad_norm": 4.414221286773682, "learning_rate": 2.5304999999999997e-06, "loss": 0.1491, "step": 3440 }, { "epoch": 1.0556470588235294, "grad_norm": 4.325644016265869, "learning_rate": 2.5279999999999998e-06, "loss": 0.1586, "step": 3445 }, { "epoch": 1.0562352941176472, "grad_norm": 3.725940465927124, "learning_rate": 2.5255e-06, "loss": 0.1585, "step": 3450 }, { "epoch": 1.0568235294117647, "grad_norm": 4.274308681488037, "learning_rate": 2.523e-06, "loss": 0.1771, "step": 3455 }, { "epoch": 1.0574117647058823, "grad_norm": 4.317328929901123, "learning_rate": 2.5205000000000003e-06, "loss": 0.1654, "step": 3460 }, { "epoch": 1.058, "grad_norm": 3.8634958267211914, "learning_rate": 2.518e-06, "loss": 0.1636, "step": 3465 }, { "epoch": 1.0585882352941176, "grad_norm": 4.37199592590332, "learning_rate": 2.5155e-06, "loss": 0.1579, "step": 3470 }, { "epoch": 1.0591764705882354, "grad_norm": 3.3441450595855713, "learning_rate": 2.513e-06, "loss": 0.1262, "step": 3475 }, { "epoch": 1.059764705882353, "grad_norm": 4.3411407470703125, "learning_rate": 2.5105e-06, "loss": 0.1475, "step": 3480 }, { "epoch": 1.0603529411764705, "grad_norm": 5.101006031036377, "learning_rate": 2.508e-06, "loss": 0.1945, "step": 3485 }, { "epoch": 1.0609411764705883, "grad_norm": 4.627914905548096, "learning_rate": 2.5055e-06, "loss": 0.1533, "step": 3490 }, { "epoch": 1.0615294117647058, "grad_norm": 4.738828182220459, "learning_rate": 2.503e-06, "loss": 0.1764, "step": 3495 }, { "epoch": 1.0621176470588236, "grad_norm": 4.556647300720215, "learning_rate": 2.5005e-06, "loss": 0.1451, "step": 3500 }, { "epoch": 1.0621176470588236, "eval_loss": 0.15932495892047882, "eval_runtime": 215.7949, "eval_samples_per_second": 8.17, "eval_steps_per_second": 2.044, "eval_wer": 9.978444404526675, "step": 3500 }, { "epoch": 1.0627058823529412, "grad_norm": 4.308196544647217, "learning_rate": 2.498e-06, "loss": 0.1502, "step": 3505 }, { "epoch": 1.0632941176470587, "grad_norm": 4.2380547523498535, "learning_rate": 2.4954999999999997e-06, "loss": 0.1619, "step": 3510 }, { "epoch": 1.0638823529411765, "grad_norm": 4.075558662414551, "learning_rate": 2.4929999999999997e-06, "loss": 0.1404, "step": 3515 }, { "epoch": 1.064470588235294, "grad_norm": 4.336547374725342, "learning_rate": 2.4904999999999997e-06, "loss": 0.1354, "step": 3520 }, { "epoch": 1.0650588235294118, "grad_norm": 4.5000457763671875, "learning_rate": 2.4879999999999997e-06, "loss": 0.1425, "step": 3525 }, { "epoch": 1.0656470588235294, "grad_norm": 4.4154372215271, "learning_rate": 2.4854999999999998e-06, "loss": 0.1374, "step": 3530 }, { "epoch": 1.066235294117647, "grad_norm": 5.808067798614502, "learning_rate": 2.4829999999999998e-06, "loss": 0.1501, "step": 3535 }, { "epoch": 1.0668235294117647, "grad_norm": 4.223494052886963, "learning_rate": 2.4805000000000002e-06, "loss": 0.1489, "step": 3540 }, { "epoch": 1.0674117647058823, "grad_norm": 4.261801719665527, "learning_rate": 2.4780000000000002e-06, "loss": 0.137, "step": 3545 }, { "epoch": 1.068, "grad_norm": 4.229691982269287, "learning_rate": 2.4755e-06, "loss": 0.1376, "step": 3550 }, { "epoch": 1.0685882352941176, "grad_norm": 4.113494396209717, "learning_rate": 2.473e-06, "loss": 0.1268, "step": 3555 }, { "epoch": 1.0691764705882354, "grad_norm": 4.417797565460205, "learning_rate": 2.4705e-06, "loss": 0.1191, "step": 3560 }, { "epoch": 1.069764705882353, "grad_norm": 4.180721282958984, "learning_rate": 2.468e-06, "loss": 0.118, "step": 3565 }, { "epoch": 1.0703529411764705, "grad_norm": 3.4688456058502197, "learning_rate": 2.4655e-06, "loss": 0.1141, "step": 3570 }, { "epoch": 1.0709411764705883, "grad_norm": 3.9118638038635254, "learning_rate": 2.463e-06, "loss": 0.1205, "step": 3575 }, { "epoch": 1.0715294117647058, "grad_norm": 3.487636089324951, "learning_rate": 2.4605e-06, "loss": 0.1049, "step": 3580 }, { "epoch": 1.0721176470588236, "grad_norm": 4.013040542602539, "learning_rate": 2.458e-06, "loss": 0.1064, "step": 3585 }, { "epoch": 1.0727058823529412, "grad_norm": 3.592228651046753, "learning_rate": 2.4554999999999996e-06, "loss": 0.1163, "step": 3590 }, { "epoch": 1.0732941176470587, "grad_norm": 3.5233852863311768, "learning_rate": 2.4529999999999996e-06, "loss": 0.1069, "step": 3595 }, { "epoch": 1.0738823529411765, "grad_norm": 4.50062894821167, "learning_rate": 2.4504999999999997e-06, "loss": 0.1552, "step": 3600 }, { "epoch": 1.074470588235294, "grad_norm": 3.49955153465271, "learning_rate": 2.4479999999999997e-06, "loss": 0.1319, "step": 3605 }, { "epoch": 1.0750588235294118, "grad_norm": 3.575005531311035, "learning_rate": 2.4454999999999997e-06, "loss": 0.1551, "step": 3610 }, { "epoch": 1.0756470588235294, "grad_norm": 4.009887218475342, "learning_rate": 2.443e-06, "loss": 0.1361, "step": 3615 }, { "epoch": 1.076235294117647, "grad_norm": 4.531233310699463, "learning_rate": 2.4405e-06, "loss": 0.1438, "step": 3620 }, { "epoch": 1.0768235294117647, "grad_norm": 3.922947645187378, "learning_rate": 2.438e-06, "loss": 0.1728, "step": 3625 }, { "epoch": 1.0774117647058823, "grad_norm": 3.781895399093628, "learning_rate": 2.4355000000000002e-06, "loss": 0.1431, "step": 3630 }, { "epoch": 1.078, "grad_norm": 4.09556770324707, "learning_rate": 2.433e-06, "loss": 0.1543, "step": 3635 }, { "epoch": 1.0785882352941176, "grad_norm": 4.337367534637451, "learning_rate": 2.4305e-06, "loss": 0.1404, "step": 3640 }, { "epoch": 1.0791764705882354, "grad_norm": 4.532893657684326, "learning_rate": 2.428e-06, "loss": 0.1423, "step": 3645 }, { "epoch": 1.079764705882353, "grad_norm": 4.41027307510376, "learning_rate": 2.4255e-06, "loss": 0.1691, "step": 3650 }, { "epoch": 1.0803529411764705, "grad_norm": 4.195791244506836, "learning_rate": 2.423e-06, "loss": 0.1696, "step": 3655 }, { "epoch": 1.0809411764705883, "grad_norm": 4.566033363342285, "learning_rate": 2.4205e-06, "loss": 0.2358, "step": 3660 }, { "epoch": 1.0815294117647059, "grad_norm": 4.0658369064331055, "learning_rate": 2.418e-06, "loss": 0.2393, "step": 3665 }, { "epoch": 1.0821176470588236, "grad_norm": 4.366953372955322, "learning_rate": 2.4155e-06, "loss": 0.3222, "step": 3670 }, { "epoch": 1.0827058823529412, "grad_norm": 4.6110358238220215, "learning_rate": 2.4129999999999996e-06, "loss": 0.2485, "step": 3675 }, { "epoch": 1.0832941176470587, "grad_norm": 4.712707042694092, "learning_rate": 2.4104999999999996e-06, "loss": 0.2668, "step": 3680 }, { "epoch": 1.0838823529411765, "grad_norm": 4.25661563873291, "learning_rate": 2.4079999999999996e-06, "loss": 0.2675, "step": 3685 }, { "epoch": 1.084470588235294, "grad_norm": 3.6329283714294434, "learning_rate": 2.4054999999999997e-06, "loss": 0.2995, "step": 3690 }, { "epoch": 1.0850588235294119, "grad_norm": 3.8797123432159424, "learning_rate": 2.403e-06, "loss": 0.2781, "step": 3695 }, { "epoch": 1.0856470588235294, "grad_norm": 4.574522495269775, "learning_rate": 2.4005e-06, "loss": 0.2858, "step": 3700 }, { "epoch": 1.086235294117647, "grad_norm": 3.450948476791382, "learning_rate": 2.398e-06, "loss": 0.3248, "step": 3705 }, { "epoch": 1.0868235294117647, "grad_norm": 3.6924803256988525, "learning_rate": 2.3955e-06, "loss": 0.3167, "step": 3710 }, { "epoch": 1.0874117647058823, "grad_norm": 4.430237770080566, "learning_rate": 2.393e-06, "loss": 0.2963, "step": 3715 }, { "epoch": 1.088, "grad_norm": 4.105355739593506, "learning_rate": 2.3905e-06, "loss": 0.2903, "step": 3720 }, { "epoch": 1.0885882352941176, "grad_norm": 5.082009792327881, "learning_rate": 2.388e-06, "loss": 0.2702, "step": 3725 }, { "epoch": 1.0891764705882352, "grad_norm": 3.320929527282715, "learning_rate": 2.3855e-06, "loss": 0.2324, "step": 3730 }, { "epoch": 1.089764705882353, "grad_norm": 4.398338317871094, "learning_rate": 2.383e-06, "loss": 0.196, "step": 3735 }, { "epoch": 1.0903529411764705, "grad_norm": 4.047602653503418, "learning_rate": 2.3805e-06, "loss": 0.2336, "step": 3740 }, { "epoch": 1.0909411764705883, "grad_norm": 3.261476516723633, "learning_rate": 2.378e-06, "loss": 0.154, "step": 3745 }, { "epoch": 1.0915294117647059, "grad_norm": 3.5284078121185303, "learning_rate": 2.3755e-06, "loss": 0.1674, "step": 3750 }, { "epoch": 1.0921176470588234, "grad_norm": 3.6257030963897705, "learning_rate": 2.373e-06, "loss": 0.1727, "step": 3755 }, { "epoch": 1.0927058823529412, "grad_norm": 4.000799655914307, "learning_rate": 2.3704999999999996e-06, "loss": 0.2556, "step": 3760 }, { "epoch": 1.0932941176470587, "grad_norm": 3.568948268890381, "learning_rate": 2.3679999999999996e-06, "loss": 0.1906, "step": 3765 }, { "epoch": 1.0938823529411765, "grad_norm": 4.486367225646973, "learning_rate": 2.3654999999999996e-06, "loss": 0.2912, "step": 3770 }, { "epoch": 1.094470588235294, "grad_norm": 3.677028179168701, "learning_rate": 2.363e-06, "loss": 0.226, "step": 3775 }, { "epoch": 1.0950588235294119, "grad_norm": 3.3966293334960938, "learning_rate": 2.3605e-06, "loss": 0.2831, "step": 3780 }, { "epoch": 1.0956470588235294, "grad_norm": 4.265522003173828, "learning_rate": 2.358e-06, "loss": 0.2535, "step": 3785 }, { "epoch": 1.096235294117647, "grad_norm": 3.657273292541504, "learning_rate": 2.3555e-06, "loss": 0.2499, "step": 3790 }, { "epoch": 1.0968235294117648, "grad_norm": 4.445555686950684, "learning_rate": 2.353e-06, "loss": 0.223, "step": 3795 }, { "epoch": 1.0974117647058823, "grad_norm": 3.9252309799194336, "learning_rate": 2.3504999999999998e-06, "loss": 0.1731, "step": 3800 }, { "epoch": 1.098, "grad_norm": 4.720244407653809, "learning_rate": 2.348e-06, "loss": 0.1582, "step": 3805 }, { "epoch": 1.0985882352941176, "grad_norm": 4.342854976654053, "learning_rate": 2.3455e-06, "loss": 0.1718, "step": 3810 }, { "epoch": 1.0991764705882352, "grad_norm": 4.804520130157471, "learning_rate": 2.343e-06, "loss": 0.1616, "step": 3815 }, { "epoch": 1.099764705882353, "grad_norm": 4.007386207580566, "learning_rate": 2.3405e-06, "loss": 0.1355, "step": 3820 }, { "epoch": 1.1003529411764705, "grad_norm": 4.7735700607299805, "learning_rate": 2.338e-06, "loss": 0.1475, "step": 3825 }, { "epoch": 1.1009411764705883, "grad_norm": 5.141053676605225, "learning_rate": 2.3355e-06, "loss": 0.1657, "step": 3830 }, { "epoch": 1.1015294117647059, "grad_norm": 3.199216842651367, "learning_rate": 2.333e-06, "loss": 0.1601, "step": 3835 }, { "epoch": 1.1021176470588236, "grad_norm": 4.165964126586914, "learning_rate": 2.3304999999999995e-06, "loss": 0.1385, "step": 3840 }, { "epoch": 1.1027058823529412, "grad_norm": 3.961205005645752, "learning_rate": 2.3279999999999996e-06, "loss": 0.1377, "step": 3845 }, { "epoch": 1.1032941176470588, "grad_norm": 4.8615288734436035, "learning_rate": 2.3254999999999996e-06, "loss": 0.1717, "step": 3850 }, { "epoch": 1.1038823529411765, "grad_norm": 5.0563483238220215, "learning_rate": 2.323e-06, "loss": 0.196, "step": 3855 }, { "epoch": 1.104470588235294, "grad_norm": 5.792454242706299, "learning_rate": 2.3205e-06, "loss": 0.204, "step": 3860 }, { "epoch": 1.1050588235294119, "grad_norm": 4.120100021362305, "learning_rate": 2.318e-06, "loss": 0.1594, "step": 3865 }, { "epoch": 1.1056470588235294, "grad_norm": 3.6119465827941895, "learning_rate": 2.3155e-06, "loss": 0.1444, "step": 3870 }, { "epoch": 1.106235294117647, "grad_norm": 3.456319808959961, "learning_rate": 2.313e-06, "loss": 0.1784, "step": 3875 }, { "epoch": 1.1068235294117648, "grad_norm": 4.416508197784424, "learning_rate": 2.3105e-06, "loss": 0.1604, "step": 3880 }, { "epoch": 1.1074117647058823, "grad_norm": 2.9513192176818848, "learning_rate": 2.3079999999999998e-06, "loss": 0.1554, "step": 3885 }, { "epoch": 1.108, "grad_norm": 3.351963520050049, "learning_rate": 2.3054999999999998e-06, "loss": 0.1389, "step": 3890 }, { "epoch": 1.1085882352941177, "grad_norm": 3.828166961669922, "learning_rate": 2.303e-06, "loss": 0.1411, "step": 3895 }, { "epoch": 1.1091764705882352, "grad_norm": 3.375274896621704, "learning_rate": 2.3005e-06, "loss": 0.1713, "step": 3900 }, { "epoch": 1.109764705882353, "grad_norm": 2.9637627601623535, "learning_rate": 2.298e-06, "loss": 0.1605, "step": 3905 }, { "epoch": 1.1103529411764705, "grad_norm": 3.616248846054077, "learning_rate": 2.2955e-06, "loss": 0.1484, "step": 3910 }, { "epoch": 1.1109411764705883, "grad_norm": 3.7091968059539795, "learning_rate": 2.293e-06, "loss": 0.1889, "step": 3915 }, { "epoch": 1.1115294117647059, "grad_norm": 3.6696510314941406, "learning_rate": 2.2905e-06, "loss": 0.1356, "step": 3920 }, { "epoch": 1.1121176470588234, "grad_norm": 3.671050548553467, "learning_rate": 2.2879999999999995e-06, "loss": 0.1549, "step": 3925 }, { "epoch": 1.1127058823529412, "grad_norm": 3.644904375076294, "learning_rate": 2.2855e-06, "loss": 0.1538, "step": 3930 }, { "epoch": 1.1132941176470588, "grad_norm": 3.9364736080169678, "learning_rate": 2.283e-06, "loss": 0.137, "step": 3935 }, { "epoch": 1.1138823529411765, "grad_norm": 4.868730068206787, "learning_rate": 2.2805e-06, "loss": 0.1356, "step": 3940 }, { "epoch": 1.114470588235294, "grad_norm": 4.622010231018066, "learning_rate": 2.278e-06, "loss": 0.124, "step": 3945 }, { "epoch": 1.1150588235294117, "grad_norm": 4.1397294998168945, "learning_rate": 2.2755e-06, "loss": 0.149, "step": 3950 }, { "epoch": 1.1156470588235294, "grad_norm": 4.465428352355957, "learning_rate": 2.273e-06, "loss": 0.1405, "step": 3955 }, { "epoch": 1.116235294117647, "grad_norm": 4.3719377517700195, "learning_rate": 2.2705e-06, "loss": 0.1529, "step": 3960 }, { "epoch": 1.1168235294117648, "grad_norm": 5.118681907653809, "learning_rate": 2.2679999999999997e-06, "loss": 0.1533, "step": 3965 }, { "epoch": 1.1174117647058823, "grad_norm": 4.24510383605957, "learning_rate": 2.2654999999999997e-06, "loss": 0.1718, "step": 3970 }, { "epoch": 1.1179999999999999, "grad_norm": 4.646854400634766, "learning_rate": 2.2629999999999998e-06, "loss": 0.2027, "step": 3975 }, { "epoch": 1.1185882352941177, "grad_norm": 3.4408071041107178, "learning_rate": 2.2604999999999998e-06, "loss": 0.2042, "step": 3980 }, { "epoch": 1.1191764705882352, "grad_norm": 4.6176323890686035, "learning_rate": 2.258e-06, "loss": 0.2355, "step": 3985 }, { "epoch": 1.119764705882353, "grad_norm": 4.826862335205078, "learning_rate": 2.2555e-06, "loss": 0.2706, "step": 3990 }, { "epoch": 1.1203529411764706, "grad_norm": 4.194958209991455, "learning_rate": 2.253e-06, "loss": 0.3052, "step": 3995 }, { "epoch": 1.1209411764705883, "grad_norm": 4.547854423522949, "learning_rate": 2.2505e-06, "loss": 0.2961, "step": 4000 }, { "epoch": 1.1209411764705883, "eval_loss": 0.15007220208644867, "eval_runtime": 216.2631, "eval_samples_per_second": 8.152, "eval_steps_per_second": 2.039, "eval_wer": 9.753906951679541, "step": 4000 }, { "epoch": 1.1215294117647059, "grad_norm": 4.23368501663208, "learning_rate": 2.2480000000000003e-06, "loss": 0.2152, "step": 4005 }, { "epoch": 1.1221176470588234, "grad_norm": 4.222485065460205, "learning_rate": 2.2455e-06, "loss": 0.2929, "step": 4010 }, { "epoch": 1.1227058823529412, "grad_norm": 4.035862922668457, "learning_rate": 2.243e-06, "loss": 0.2845, "step": 4015 }, { "epoch": 1.1232941176470588, "grad_norm": 4.764565944671631, "learning_rate": 2.2405e-06, "loss": 0.2837, "step": 4020 }, { "epoch": 1.1238823529411766, "grad_norm": 4.525498867034912, "learning_rate": 2.238e-06, "loss": 0.2647, "step": 4025 }, { "epoch": 1.124470588235294, "grad_norm": 4.916162967681885, "learning_rate": 2.2355e-06, "loss": 0.2495, "step": 4030 }, { "epoch": 1.1250588235294119, "grad_norm": 4.017928123474121, "learning_rate": 2.233e-06, "loss": 0.1924, "step": 4035 }, { "epoch": 1.1256470588235294, "grad_norm": 4.768874168395996, "learning_rate": 2.2305e-06, "loss": 0.2282, "step": 4040 }, { "epoch": 1.126235294117647, "grad_norm": 5.6621413230896, "learning_rate": 2.228e-06, "loss": 0.197, "step": 4045 }, { "epoch": 1.1268235294117648, "grad_norm": 4.481237411499023, "learning_rate": 2.2254999999999997e-06, "loss": 0.1669, "step": 4050 }, { "epoch": 1.1274117647058823, "grad_norm": 4.188382625579834, "learning_rate": 2.2229999999999997e-06, "loss": 0.1551, "step": 4055 }, { "epoch": 1.1280000000000001, "grad_norm": 4.246347904205322, "learning_rate": 2.2204999999999997e-06, "loss": 0.1535, "step": 4060 }, { "epoch": 1.1285882352941177, "grad_norm": 3.995572328567505, "learning_rate": 2.2179999999999998e-06, "loss": 0.1744, "step": 4065 }, { "epoch": 1.1291764705882352, "grad_norm": 4.287552356719971, "learning_rate": 2.2155e-06, "loss": 0.1675, "step": 4070 }, { "epoch": 1.129764705882353, "grad_norm": 4.501704216003418, "learning_rate": 2.213e-06, "loss": 0.1976, "step": 4075 }, { "epoch": 1.1303529411764706, "grad_norm": 4.956085681915283, "learning_rate": 2.2105e-06, "loss": 0.1536, "step": 4080 }, { "epoch": 1.1309411764705883, "grad_norm": 3.691065788269043, "learning_rate": 2.2080000000000003e-06, "loss": 0.1574, "step": 4085 }, { "epoch": 1.131529411764706, "grad_norm": 4.585495471954346, "learning_rate": 2.2055e-06, "loss": 0.1441, "step": 4090 }, { "epoch": 1.1321176470588235, "grad_norm": 4.583806991577148, "learning_rate": 2.203e-06, "loss": 0.1872, "step": 4095 }, { "epoch": 1.1327058823529412, "grad_norm": 3.265049695968628, "learning_rate": 2.2005e-06, "loss": 0.148, "step": 4100 }, { "epoch": 1.1332941176470588, "grad_norm": 3.723870038986206, "learning_rate": 2.198e-06, "loss": 0.1466, "step": 4105 }, { "epoch": 1.1338823529411766, "grad_norm": 4.475979804992676, "learning_rate": 2.1955e-06, "loss": 0.135, "step": 4110 }, { "epoch": 1.1344705882352941, "grad_norm": 3.887965440750122, "learning_rate": 2.193e-06, "loss": 0.1518, "step": 4115 }, { "epoch": 1.1350588235294117, "grad_norm": 4.384058475494385, "learning_rate": 2.1905e-06, "loss": 0.151, "step": 4120 }, { "epoch": 1.1356470588235295, "grad_norm": 4.647097587585449, "learning_rate": 2.188e-06, "loss": 0.1462, "step": 4125 }, { "epoch": 1.136235294117647, "grad_norm": 4.415635108947754, "learning_rate": 2.1855e-06, "loss": 0.1544, "step": 4130 }, { "epoch": 1.1368235294117648, "grad_norm": 3.290691375732422, "learning_rate": 2.1829999999999997e-06, "loss": 0.1548, "step": 4135 }, { "epoch": 1.1374117647058823, "grad_norm": 4.82108736038208, "learning_rate": 2.1804999999999997e-06, "loss": 0.1357, "step": 4140 }, { "epoch": 1.138, "grad_norm": 4.983867168426514, "learning_rate": 2.1779999999999997e-06, "loss": 0.1653, "step": 4145 }, { "epoch": 1.1385882352941177, "grad_norm": 3.826066493988037, "learning_rate": 2.1754999999999998e-06, "loss": 0.14, "step": 4150 }, { "epoch": 1.1391764705882352, "grad_norm": 4.195671558380127, "learning_rate": 2.1729999999999998e-06, "loss": 0.1455, "step": 4155 }, { "epoch": 1.139764705882353, "grad_norm": 3.079108953475952, "learning_rate": 2.1705e-06, "loss": 0.1297, "step": 4160 }, { "epoch": 1.1403529411764706, "grad_norm": 5.005169868469238, "learning_rate": 2.1680000000000002e-06, "loss": 0.1614, "step": 4165 }, { "epoch": 1.1409411764705881, "grad_norm": 5.035433292388916, "learning_rate": 2.1655000000000003e-06, "loss": 0.1336, "step": 4170 }, { "epoch": 1.141529411764706, "grad_norm": 5.040280342102051, "learning_rate": 2.163e-06, "loss": 0.1573, "step": 4175 }, { "epoch": 1.1421176470588235, "grad_norm": 4.219944953918457, "learning_rate": 2.1605e-06, "loss": 0.1446, "step": 4180 }, { "epoch": 1.1427058823529412, "grad_norm": 4.404350280761719, "learning_rate": 2.158e-06, "loss": 0.145, "step": 4185 }, { "epoch": 1.1432941176470588, "grad_norm": 4.801955699920654, "learning_rate": 2.1555e-06, "loss": 0.1393, "step": 4190 }, { "epoch": 1.1438823529411764, "grad_norm": 3.6028900146484375, "learning_rate": 2.153e-06, "loss": 0.1289, "step": 4195 }, { "epoch": 1.1444705882352941, "grad_norm": 4.395596027374268, "learning_rate": 2.1505e-06, "loss": 0.119, "step": 4200 }, { "epoch": 1.1450588235294117, "grad_norm": 4.804821968078613, "learning_rate": 2.148e-06, "loss": 0.1369, "step": 4205 }, { "epoch": 1.1456470588235295, "grad_norm": 3.457217216491699, "learning_rate": 2.1455e-06, "loss": 0.1383, "step": 4210 }, { "epoch": 1.146235294117647, "grad_norm": 4.854312896728516, "learning_rate": 2.1429999999999996e-06, "loss": 0.1255, "step": 4215 }, { "epoch": 1.1468235294117648, "grad_norm": 4.633482933044434, "learning_rate": 2.1404999999999997e-06, "loss": 0.1343, "step": 4220 }, { "epoch": 1.1474117647058824, "grad_norm": 6.185222625732422, "learning_rate": 2.1379999999999997e-06, "loss": 0.1723, "step": 4225 }, { "epoch": 1.148, "grad_norm": 6.302745342254639, "learning_rate": 2.1354999999999997e-06, "loss": 0.1797, "step": 4230 }, { "epoch": 1.1485882352941177, "grad_norm": 5.542764663696289, "learning_rate": 2.1329999999999997e-06, "loss": 0.1962, "step": 4235 }, { "epoch": 1.1491764705882352, "grad_norm": 6.249689102172852, "learning_rate": 2.1304999999999998e-06, "loss": 0.238, "step": 4240 }, { "epoch": 1.149764705882353, "grad_norm": 5.082059860229492, "learning_rate": 2.128e-06, "loss": 0.1966, "step": 4245 }, { "epoch": 1.1503529411764706, "grad_norm": 5.025601863861084, "learning_rate": 2.1255000000000002e-06, "loss": 0.226, "step": 4250 }, { "epoch": 1.1509411764705884, "grad_norm": 4.80434513092041, "learning_rate": 2.1230000000000003e-06, "loss": 0.1897, "step": 4255 }, { "epoch": 1.151529411764706, "grad_norm": 4.74323034286499, "learning_rate": 2.1205e-06, "loss": 0.2037, "step": 4260 }, { "epoch": 1.1521176470588235, "grad_norm": 5.358842372894287, "learning_rate": 2.118e-06, "loss": 0.1969, "step": 4265 }, { "epoch": 1.1527058823529412, "grad_norm": 5.005589485168457, "learning_rate": 2.1155e-06, "loss": 0.1947, "step": 4270 }, { "epoch": 1.1532941176470588, "grad_norm": 5.486140727996826, "learning_rate": 2.113e-06, "loss": 0.1893, "step": 4275 }, { "epoch": 1.1538823529411766, "grad_norm": 4.575599193572998, "learning_rate": 2.1105e-06, "loss": 0.1578, "step": 4280 }, { "epoch": 1.1544705882352941, "grad_norm": 4.063296794891357, "learning_rate": 2.108e-06, "loss": 0.1565, "step": 4285 }, { "epoch": 1.1550588235294117, "grad_norm": 3.992035388946533, "learning_rate": 2.1055e-06, "loss": 0.1646, "step": 4290 }, { "epoch": 1.1556470588235295, "grad_norm": 4.0920610427856445, "learning_rate": 2.103e-06, "loss": 0.1784, "step": 4295 }, { "epoch": 1.156235294117647, "grad_norm": 7.897546291351318, "learning_rate": 2.1004999999999996e-06, "loss": 0.4008, "step": 4300 }, { "epoch": 1.1568235294117648, "grad_norm": 5.738661289215088, "learning_rate": 2.0979999999999996e-06, "loss": 0.6018, "step": 4305 }, { "epoch": 1.1574117647058824, "grad_norm": 6.263293266296387, "learning_rate": 2.0954999999999997e-06, "loss": 0.4464, "step": 4310 }, { "epoch": 1.158, "grad_norm": 5.564026355743408, "learning_rate": 2.0929999999999997e-06, "loss": 0.3377, "step": 4315 }, { "epoch": 1.1585882352941177, "grad_norm": 3.16245698928833, "learning_rate": 2.0904999999999997e-06, "loss": 0.3037, "step": 4320 }, { "epoch": 1.1591764705882353, "grad_norm": 4.2163777351379395, "learning_rate": 2.088e-06, "loss": 0.377, "step": 4325 }, { "epoch": 1.159764705882353, "grad_norm": 3.7926177978515625, "learning_rate": 2.0855e-06, "loss": 0.1667, "step": 4330 }, { "epoch": 1.1603529411764706, "grad_norm": 3.056328296661377, "learning_rate": 2.0830000000000002e-06, "loss": 0.1361, "step": 4335 }, { "epoch": 1.1609411764705881, "grad_norm": 2.900149345397949, "learning_rate": 2.0805e-06, "loss": 0.144, "step": 4340 }, { "epoch": 1.161529411764706, "grad_norm": 3.768230676651001, "learning_rate": 2.078e-06, "loss": 0.1285, "step": 4345 }, { "epoch": 1.1621176470588235, "grad_norm": 3.5829954147338867, "learning_rate": 2.0755e-06, "loss": 0.1372, "step": 4350 }, { "epoch": 1.1627058823529413, "grad_norm": 2.915019989013672, "learning_rate": 2.073e-06, "loss": 0.1328, "step": 4355 }, { "epoch": 1.1632941176470588, "grad_norm": 3.1212756633758545, "learning_rate": 2.0705e-06, "loss": 0.0993, "step": 4360 }, { "epoch": 1.1638823529411764, "grad_norm": 3.3507394790649414, "learning_rate": 2.068e-06, "loss": 0.1298, "step": 4365 }, { "epoch": 1.1644705882352941, "grad_norm": 2.548102378845215, "learning_rate": 2.0655e-06, "loss": 0.1108, "step": 4370 }, { "epoch": 1.1650588235294117, "grad_norm": 3.1974732875823975, "learning_rate": 2.063e-06, "loss": 0.0992, "step": 4375 }, { "epoch": 1.1656470588235295, "grad_norm": 3.177469253540039, "learning_rate": 2.0605e-06, "loss": 0.1213, "step": 4380 }, { "epoch": 1.166235294117647, "grad_norm": 3.4445009231567383, "learning_rate": 2.0579999999999996e-06, "loss": 0.1681, "step": 4385 }, { "epoch": 1.1668235294117646, "grad_norm": 3.960343360900879, "learning_rate": 2.0554999999999996e-06, "loss": 0.293, "step": 4390 }, { "epoch": 1.1674117647058824, "grad_norm": 3.4589648246765137, "learning_rate": 2.0529999999999997e-06, "loss": 0.2647, "step": 4395 }, { "epoch": 1.168, "grad_norm": 3.958676815032959, "learning_rate": 2.0505e-06, "loss": 0.2209, "step": 4400 }, { "epoch": 1.1685882352941177, "grad_norm": 3.545652389526367, "learning_rate": 2.048e-06, "loss": 0.1887, "step": 4405 }, { "epoch": 1.1691764705882353, "grad_norm": 4.148390293121338, "learning_rate": 2.0455e-06, "loss": 0.1535, "step": 4410 }, { "epoch": 1.1697647058823528, "grad_norm": 4.84144926071167, "learning_rate": 2.043e-06, "loss": 0.2356, "step": 4415 }, { "epoch": 1.1703529411764706, "grad_norm": 3.351024866104126, "learning_rate": 2.0405e-06, "loss": 0.1553, "step": 4420 }, { "epoch": 1.1709411764705882, "grad_norm": 2.839557647705078, "learning_rate": 2.038e-06, "loss": 0.1356, "step": 4425 }, { "epoch": 1.171529411764706, "grad_norm": 4.592451572418213, "learning_rate": 2.0355e-06, "loss": 0.1188, "step": 4430 }, { "epoch": 1.1721176470588235, "grad_norm": 3.6243197917938232, "learning_rate": 2.033e-06, "loss": 0.1193, "step": 4435 }, { "epoch": 1.1727058823529413, "grad_norm": 3.123065233230591, "learning_rate": 2.0305e-06, "loss": 0.0958, "step": 4440 }, { "epoch": 1.1732941176470588, "grad_norm": 4.1203999519348145, "learning_rate": 2.028e-06, "loss": 0.0956, "step": 4445 }, { "epoch": 1.1738823529411764, "grad_norm": 3.5064687728881836, "learning_rate": 2.0255e-06, "loss": 0.0908, "step": 4450 }, { "epoch": 1.1744705882352942, "grad_norm": 4.005610942840576, "learning_rate": 2.023e-06, "loss": 0.078, "step": 4455 }, { "epoch": 1.1750588235294117, "grad_norm": 3.5341951847076416, "learning_rate": 2.0205e-06, "loss": 0.0638, "step": 4460 }, { "epoch": 1.1756470588235295, "grad_norm": 2.8387720584869385, "learning_rate": 2.0179999999999996e-06, "loss": 0.0686, "step": 4465 }, { "epoch": 1.176235294117647, "grad_norm": 3.9467122554779053, "learning_rate": 2.0154999999999996e-06, "loss": 0.0723, "step": 4470 }, { "epoch": 1.1768235294117648, "grad_norm": 2.7124178409576416, "learning_rate": 2.0129999999999996e-06, "loss": 0.0522, "step": 4475 }, { "epoch": 1.1774117647058824, "grad_norm": 2.5675783157348633, "learning_rate": 2.0105e-06, "loss": 0.0798, "step": 4480 }, { "epoch": 1.178, "grad_norm": 2.732994556427002, "learning_rate": 2.008e-06, "loss": 0.1158, "step": 4485 }, { "epoch": 1.1785882352941177, "grad_norm": 2.965308904647827, "learning_rate": 2.0055e-06, "loss": 0.1049, "step": 4490 }, { "epoch": 1.1791764705882353, "grad_norm": 4.476377964019775, "learning_rate": 2.003e-06, "loss": 0.1535, "step": 4495 }, { "epoch": 1.179764705882353, "grad_norm": 5.202823638916016, "learning_rate": 2.0005e-06, "loss": 0.1387, "step": 4500 }, { "epoch": 1.179764705882353, "eval_loss": 0.14640001952648163, "eval_runtime": 217.1717, "eval_samples_per_second": 8.118, "eval_steps_per_second": 2.031, "eval_wer": 9.232980061074187, "step": 4500 }, { "epoch": 1.1803529411764706, "grad_norm": 6.2610087394714355, "learning_rate": 1.9979999999999998e-06, "loss": 0.247, "step": 4505 }, { "epoch": 1.1809411764705882, "grad_norm": 4.51934814453125, "learning_rate": 1.9954999999999998e-06, "loss": 0.2083, "step": 4510 }, { "epoch": 1.181529411764706, "grad_norm": 3.095548391342163, "learning_rate": 1.993e-06, "loss": 0.1956, "step": 4515 }, { "epoch": 1.1821176470588235, "grad_norm": 3.439675807952881, "learning_rate": 1.9905e-06, "loss": 0.1816, "step": 4520 }, { "epoch": 1.1827058823529413, "grad_norm": 4.514340877532959, "learning_rate": 1.988e-06, "loss": 0.1739, "step": 4525 }, { "epoch": 1.1832941176470588, "grad_norm": 3.529175281524658, "learning_rate": 1.9855e-06, "loss": 0.2008, "step": 4530 }, { "epoch": 1.1838823529411764, "grad_norm": 3.220459461212158, "learning_rate": 1.983e-06, "loss": 0.1255, "step": 4535 }, { "epoch": 1.1844705882352942, "grad_norm": 2.7540953159332275, "learning_rate": 1.9805e-06, "loss": 0.192, "step": 4540 }, { "epoch": 1.1850588235294117, "grad_norm": 3.9074437618255615, "learning_rate": 1.978e-06, "loss": 0.1762, "step": 4545 }, { "epoch": 1.1856470588235295, "grad_norm": 3.209683895111084, "learning_rate": 1.9755e-06, "loss": 0.1535, "step": 4550 }, { "epoch": 1.186235294117647, "grad_norm": 3.7311837673187256, "learning_rate": 1.973e-06, "loss": 0.137, "step": 4555 }, { "epoch": 1.1868235294117646, "grad_norm": 5.1133904457092285, "learning_rate": 1.9705e-06, "loss": 0.1543, "step": 4560 }, { "epoch": 1.1874117647058824, "grad_norm": 4.132019996643066, "learning_rate": 1.968e-06, "loss": 0.1707, "step": 4565 }, { "epoch": 1.188, "grad_norm": 4.4807209968566895, "learning_rate": 1.9655e-06, "loss": 0.1525, "step": 4570 }, { "epoch": 1.1885882352941177, "grad_norm": 4.465579986572266, "learning_rate": 1.963e-06, "loss": 0.1364, "step": 4575 }, { "epoch": 1.1891764705882353, "grad_norm": 5.169947624206543, "learning_rate": 1.9604999999999997e-06, "loss": 0.1493, "step": 4580 }, { "epoch": 1.1897647058823528, "grad_norm": 4.79520320892334, "learning_rate": 1.9579999999999997e-06, "loss": 0.1294, "step": 4585 }, { "epoch": 1.1903529411764706, "grad_norm": 3.879488229751587, "learning_rate": 1.9554999999999997e-06, "loss": 0.1567, "step": 4590 }, { "epoch": 1.1909411764705882, "grad_norm": 4.155298233032227, "learning_rate": 1.953e-06, "loss": 0.151, "step": 4595 }, { "epoch": 1.191529411764706, "grad_norm": 6.497487545013428, "learning_rate": 1.9505e-06, "loss": 0.1388, "step": 4600 }, { "epoch": 1.1921176470588235, "grad_norm": 4.886396408081055, "learning_rate": 1.948e-06, "loss": 0.1795, "step": 4605 }, { "epoch": 1.192705882352941, "grad_norm": 4.418575286865234, "learning_rate": 1.9455e-06, "loss": 0.2097, "step": 4610 }, { "epoch": 1.1932941176470588, "grad_norm": 5.331243515014648, "learning_rate": 1.943e-06, "loss": 0.2758, "step": 4615 }, { "epoch": 1.1938823529411764, "grad_norm": 4.084447383880615, "learning_rate": 1.9405e-06, "loss": 0.318, "step": 4620 }, { "epoch": 1.1944705882352942, "grad_norm": 3.595201015472412, "learning_rate": 1.938e-06, "loss": 0.3068, "step": 4625 }, { "epoch": 1.1950588235294117, "grad_norm": 4.595006942749023, "learning_rate": 1.9355e-06, "loss": 0.3661, "step": 4630 }, { "epoch": 1.1956470588235295, "grad_norm": 3.829101085662842, "learning_rate": 1.933e-06, "loss": 0.3131, "step": 4635 }, { "epoch": 1.196235294117647, "grad_norm": 4.259395122528076, "learning_rate": 1.9305e-06, "loss": 0.2979, "step": 4640 }, { "epoch": 1.1968235294117646, "grad_norm": 3.6669700145721436, "learning_rate": 1.928e-06, "loss": 0.2769, "step": 4645 }, { "epoch": 1.1974117647058824, "grad_norm": 3.959017276763916, "learning_rate": 1.9255e-06, "loss": 0.3253, "step": 4650 }, { "epoch": 1.198, "grad_norm": 3.6177120208740234, "learning_rate": 1.923e-06, "loss": 0.2853, "step": 4655 }, { "epoch": 1.1985882352941177, "grad_norm": 4.340188980102539, "learning_rate": 1.9205e-06, "loss": 0.292, "step": 4660 }, { "epoch": 1.1991764705882353, "grad_norm": 4.246086597442627, "learning_rate": 1.9179999999999997e-06, "loss": 0.282, "step": 4665 }, { "epoch": 1.1997647058823528, "grad_norm": 4.474183559417725, "learning_rate": 1.9154999999999997e-06, "loss": 0.2358, "step": 4670 }, { "epoch": 1.2003529411764706, "grad_norm": 3.5266544818878174, "learning_rate": 1.913e-06, "loss": 0.221, "step": 4675 }, { "epoch": 1.2009411764705882, "grad_norm": 3.739431142807007, "learning_rate": 1.9105e-06, "loss": 0.2463, "step": 4680 }, { "epoch": 1.201529411764706, "grad_norm": 3.6631460189819336, "learning_rate": 1.9079999999999998e-06, "loss": 0.1915, "step": 4685 }, { "epoch": 1.2021176470588235, "grad_norm": 3.8748130798339844, "learning_rate": 1.9054999999999998e-06, "loss": 0.1724, "step": 4690 }, { "epoch": 1.2027058823529413, "grad_norm": 3.5952937602996826, "learning_rate": 1.903e-06, "loss": 0.1876, "step": 4695 }, { "epoch": 1.2032941176470588, "grad_norm": 3.926798105239868, "learning_rate": 1.9005e-06, "loss": 0.2175, "step": 4700 }, { "epoch": 1.2038823529411764, "grad_norm": 3.9547531604766846, "learning_rate": 1.8979999999999999e-06, "loss": 0.1875, "step": 4705 }, { "epoch": 1.2044705882352942, "grad_norm": 3.8086087703704834, "learning_rate": 1.8954999999999999e-06, "loss": 0.2119, "step": 4710 }, { "epoch": 1.2050588235294117, "grad_norm": 3.9097371101379395, "learning_rate": 1.893e-06, "loss": 0.1877, "step": 4715 }, { "epoch": 1.2056470588235295, "grad_norm": 3.3460967540740967, "learning_rate": 1.8905e-06, "loss": 0.1896, "step": 4720 }, { "epoch": 1.206235294117647, "grad_norm": 2.549927234649658, "learning_rate": 1.8879999999999998e-06, "loss": 0.1443, "step": 4725 }, { "epoch": 1.2068235294117646, "grad_norm": 3.6211559772491455, "learning_rate": 1.8854999999999998e-06, "loss": 0.1585, "step": 4730 }, { "epoch": 1.2074117647058824, "grad_norm": 2.835987091064453, "learning_rate": 1.883e-06, "loss": 0.1483, "step": 4735 }, { "epoch": 1.208, "grad_norm": 3.059001922607422, "learning_rate": 1.8805e-06, "loss": 0.141, "step": 4740 }, { "epoch": 1.2085882352941177, "grad_norm": 3.879406690597534, "learning_rate": 1.8779999999999998e-06, "loss": 0.1682, "step": 4745 }, { "epoch": 1.2091764705882353, "grad_norm": 2.9307780265808105, "learning_rate": 1.8754999999999999e-06, "loss": 0.1708, "step": 4750 }, { "epoch": 1.2097647058823529, "grad_norm": 2.7809829711914062, "learning_rate": 1.873e-06, "loss": 0.1259, "step": 4755 }, { "epoch": 1.2103529411764706, "grad_norm": 2.6580557823181152, "learning_rate": 1.8705e-06, "loss": 0.1831, "step": 4760 }, { "epoch": 1.2109411764705882, "grad_norm": 3.394289016723633, "learning_rate": 1.868e-06, "loss": 0.1563, "step": 4765 }, { "epoch": 1.211529411764706, "grad_norm": 3.552314519882202, "learning_rate": 1.8654999999999998e-06, "loss": 0.143, "step": 4770 }, { "epoch": 1.2121176470588235, "grad_norm": 3.360884428024292, "learning_rate": 1.863e-06, "loss": 0.1388, "step": 4775 }, { "epoch": 1.212705882352941, "grad_norm": 3.025977611541748, "learning_rate": 1.8605e-06, "loss": 0.1734, "step": 4780 }, { "epoch": 1.2132941176470589, "grad_norm": 3.5540735721588135, "learning_rate": 1.858e-06, "loss": 0.1684, "step": 4785 }, { "epoch": 1.2138823529411764, "grad_norm": 3.5086874961853027, "learning_rate": 1.8554999999999999e-06, "loss": 0.1495, "step": 4790 }, { "epoch": 1.2144705882352942, "grad_norm": 5.4808220863342285, "learning_rate": 1.8529999999999999e-06, "loss": 0.1547, "step": 4795 }, { "epoch": 1.2150588235294117, "grad_norm": 4.124263286590576, "learning_rate": 1.8505e-06, "loss": 0.1383, "step": 4800 }, { "epoch": 1.2156470588235293, "grad_norm": 4.038989067077637, "learning_rate": 1.848e-06, "loss": 0.139, "step": 4805 }, { "epoch": 1.216235294117647, "grad_norm": 4.514219760894775, "learning_rate": 1.8454999999999997e-06, "loss": 0.1513, "step": 4810 }, { "epoch": 1.2168235294117646, "grad_norm": 4.679487228393555, "learning_rate": 1.843e-06, "loss": 0.1294, "step": 4815 }, { "epoch": 1.2174117647058824, "grad_norm": 4.638938903808594, "learning_rate": 1.8405e-06, "loss": 0.1491, "step": 4820 }, { "epoch": 1.218, "grad_norm": 4.146193981170654, "learning_rate": 1.838e-06, "loss": 0.1531, "step": 4825 }, { "epoch": 1.2185882352941175, "grad_norm": 5.117063045501709, "learning_rate": 1.8354999999999998e-06, "loss": 0.1545, "step": 4830 }, { "epoch": 1.2191764705882353, "grad_norm": 4.408163070678711, "learning_rate": 1.8329999999999999e-06, "loss": 0.1676, "step": 4835 }, { "epoch": 1.2197647058823529, "grad_norm": 3.9175965785980225, "learning_rate": 1.8304999999999999e-06, "loss": 0.1397, "step": 4840 }, { "epoch": 1.2203529411764706, "grad_norm": 4.868222713470459, "learning_rate": 1.828e-06, "loss": 0.1628, "step": 4845 }, { "epoch": 1.2209411764705882, "grad_norm": 4.249133586883545, "learning_rate": 1.8254999999999997e-06, "loss": 0.1272, "step": 4850 }, { "epoch": 1.221529411764706, "grad_norm": 3.6746976375579834, "learning_rate": 1.823e-06, "loss": 0.1452, "step": 4855 }, { "epoch": 1.2221176470588235, "grad_norm": 3.9122142791748047, "learning_rate": 1.8205e-06, "loss": 0.1487, "step": 4860 }, { "epoch": 1.222705882352941, "grad_norm": 4.649582386016846, "learning_rate": 1.818e-06, "loss": 0.1717, "step": 4865 }, { "epoch": 1.2232941176470589, "grad_norm": 4.539823532104492, "learning_rate": 1.8154999999999998e-06, "loss": 0.1606, "step": 4870 }, { "epoch": 1.2238823529411764, "grad_norm": 3.992648124694824, "learning_rate": 1.8129999999999998e-06, "loss": 0.1697, "step": 4875 }, { "epoch": 1.2244705882352942, "grad_norm": 5.067594051361084, "learning_rate": 1.8104999999999999e-06, "loss": 0.1839, "step": 4880 }, { "epoch": 1.2250588235294118, "grad_norm": 4.412503242492676, "learning_rate": 1.8079999999999999e-06, "loss": 0.1567, "step": 4885 }, { "epoch": 1.2256470588235295, "grad_norm": 4.843005180358887, "learning_rate": 1.8055000000000001e-06, "loss": 0.1819, "step": 4890 }, { "epoch": 1.226235294117647, "grad_norm": 4.100526332855225, "learning_rate": 1.803e-06, "loss": 0.1598, "step": 4895 }, { "epoch": 1.2268235294117646, "grad_norm": 5.395322799682617, "learning_rate": 1.8005e-06, "loss": 0.2032, "step": 4900 }, { "epoch": 1.2274117647058824, "grad_norm": 5.583508491516113, "learning_rate": 1.798e-06, "loss": 0.1832, "step": 4905 }, { "epoch": 1.228, "grad_norm": 4.793237209320068, "learning_rate": 1.7955e-06, "loss": 0.1532, "step": 4910 }, { "epoch": 1.2285882352941178, "grad_norm": 4.354899883270264, "learning_rate": 1.7929999999999998e-06, "loss": 0.1855, "step": 4915 }, { "epoch": 1.2291764705882353, "grad_norm": 4.951694965362549, "learning_rate": 1.7904999999999998e-06, "loss": 0.1915, "step": 4920 }, { "epoch": 1.2297647058823529, "grad_norm": 5.430246829986572, "learning_rate": 1.7879999999999999e-06, "loss": 0.2616, "step": 4925 }, { "epoch": 1.2303529411764706, "grad_norm": 4.590375900268555, "learning_rate": 1.7855e-06, "loss": 0.2727, "step": 4930 }, { "epoch": 1.2309411764705882, "grad_norm": 4.662115097045898, "learning_rate": 1.783e-06, "loss": 0.2769, "step": 4935 }, { "epoch": 1.231529411764706, "grad_norm": 4.526543140411377, "learning_rate": 1.7805e-06, "loss": 0.3061, "step": 4940 }, { "epoch": 1.2321176470588235, "grad_norm": 4.448855400085449, "learning_rate": 1.778e-06, "loss": 0.2898, "step": 4945 }, { "epoch": 1.232705882352941, "grad_norm": 4.545901775360107, "learning_rate": 1.7755e-06, "loss": 0.3277, "step": 4950 }, { "epoch": 1.2332941176470589, "grad_norm": 3.7485060691833496, "learning_rate": 1.7729999999999998e-06, "loss": 0.3027, "step": 4955 }, { "epoch": 1.2338823529411764, "grad_norm": 4.160163402557373, "learning_rate": 1.7704999999999998e-06, "loss": 0.3395, "step": 4960 }, { "epoch": 1.2344705882352942, "grad_norm": 3.5571179389953613, "learning_rate": 1.7679999999999998e-06, "loss": 0.2948, "step": 4965 }, { "epoch": 1.2350588235294118, "grad_norm": 4.010355472564697, "learning_rate": 1.7655e-06, "loss": 0.3118, "step": 4970 }, { "epoch": 1.2356470588235293, "grad_norm": 4.388914108276367, "learning_rate": 1.7629999999999999e-06, "loss": 0.2594, "step": 4975 }, { "epoch": 1.236235294117647, "grad_norm": 4.002409934997559, "learning_rate": 1.7605e-06, "loss": 0.2738, "step": 4980 }, { "epoch": 1.2368235294117647, "grad_norm": 3.9200005531311035, "learning_rate": 1.758e-06, "loss": 0.3231, "step": 4985 }, { "epoch": 1.2374117647058824, "grad_norm": 3.951580286026001, "learning_rate": 1.7555e-06, "loss": 0.2038, "step": 4990 }, { "epoch": 1.238, "grad_norm": 4.1015753746032715, "learning_rate": 1.7529999999999998e-06, "loss": 0.2116, "step": 4995 }, { "epoch": 1.2385882352941175, "grad_norm": 4.897361755371094, "learning_rate": 1.7504999999999998e-06, "loss": 0.2622, "step": 5000 }, { "epoch": 1.2385882352941175, "eval_loss": 0.14533402025699615, "eval_runtime": 218.1554, "eval_samples_per_second": 8.081, "eval_steps_per_second": 2.021, "eval_wer": 9.098257589365906, "step": 5000 }, { "epoch": 1.2391764705882353, "grad_norm": 3.6949570178985596, "learning_rate": 1.7479999999999998e-06, "loss": 0.1969, "step": 5005 }, { "epoch": 1.2397647058823529, "grad_norm": 4.53063440322876, "learning_rate": 1.7455e-06, "loss": 0.1999, "step": 5010 }, { "epoch": 1.2403529411764707, "grad_norm": 5.626417636871338, "learning_rate": 1.743e-06, "loss": 0.1877, "step": 5015 }, { "epoch": 1.2409411764705882, "grad_norm": 4.335415363311768, "learning_rate": 1.7405e-06, "loss": 0.1522, "step": 5020 }, { "epoch": 1.2415294117647058, "grad_norm": 4.450643539428711, "learning_rate": 1.738e-06, "loss": 0.1676, "step": 5025 }, { "epoch": 1.2421176470588235, "grad_norm": 4.217557430267334, "learning_rate": 1.7355e-06, "loss": 0.1595, "step": 5030 }, { "epoch": 1.242705882352941, "grad_norm": 3.0353915691375732, "learning_rate": 1.733e-06, "loss": 0.1733, "step": 5035 }, { "epoch": 1.2432941176470589, "grad_norm": 4.106424808502197, "learning_rate": 1.7304999999999998e-06, "loss": 0.1395, "step": 5040 }, { "epoch": 1.2438823529411764, "grad_norm": 4.418708324432373, "learning_rate": 1.7279999999999998e-06, "loss": 0.1582, "step": 5045 }, { "epoch": 1.244470588235294, "grad_norm": 4.904573440551758, "learning_rate": 1.7255e-06, "loss": 0.1464, "step": 5050 }, { "epoch": 1.2450588235294118, "grad_norm": 3.8515071868896484, "learning_rate": 1.723e-06, "loss": 0.1621, "step": 5055 }, { "epoch": 1.2456470588235293, "grad_norm": 4.502257823944092, "learning_rate": 1.7204999999999999e-06, "loss": 0.1608, "step": 5060 }, { "epoch": 1.246235294117647, "grad_norm": 4.288053035736084, "learning_rate": 1.718e-06, "loss": 0.1394, "step": 5065 }, { "epoch": 1.2468235294117647, "grad_norm": 4.391435623168945, "learning_rate": 1.7155e-06, "loss": 0.1404, "step": 5070 }, { "epoch": 1.2474117647058824, "grad_norm": 4.700385570526123, "learning_rate": 1.713e-06, "loss": 0.1585, "step": 5075 }, { "epoch": 1.248, "grad_norm": 4.629156112670898, "learning_rate": 1.7104999999999998e-06, "loss": 0.1447, "step": 5080 }, { "epoch": 1.2485882352941176, "grad_norm": 6.200710773468018, "learning_rate": 1.7079999999999998e-06, "loss": 0.145, "step": 5085 }, { "epoch": 1.2491764705882353, "grad_norm": 3.928133487701416, "learning_rate": 1.7055e-06, "loss": 0.1568, "step": 5090 }, { "epoch": 1.2497647058823529, "grad_norm": 3.873077154159546, "learning_rate": 1.703e-06, "loss": 0.1504, "step": 5095 }, { "epoch": 1.2503529411764707, "grad_norm": 3.7462360858917236, "learning_rate": 1.7004999999999999e-06, "loss": 0.1436, "step": 5100 }, { "epoch": 1.2509411764705882, "grad_norm": 4.185761451721191, "learning_rate": 1.6979999999999999e-06, "loss": 0.1255, "step": 5105 }, { "epoch": 1.251529411764706, "grad_norm": 5.082767963409424, "learning_rate": 1.6955e-06, "loss": 0.1193, "step": 5110 }, { "epoch": 1.2521176470588236, "grad_norm": 4.552000999450684, "learning_rate": 1.693e-06, "loss": 0.1349, "step": 5115 }, { "epoch": 1.2527058823529411, "grad_norm": 3.6258492469787598, "learning_rate": 1.6904999999999997e-06, "loss": 0.1424, "step": 5120 }, { "epoch": 1.253294117647059, "grad_norm": 4.4037346839904785, "learning_rate": 1.6879999999999998e-06, "loss": 0.1497, "step": 5125 }, { "epoch": 1.2538823529411764, "grad_norm": 5.2759575843811035, "learning_rate": 1.6855e-06, "loss": 0.1413, "step": 5130 }, { "epoch": 1.2544705882352942, "grad_norm": 4.168008804321289, "learning_rate": 1.683e-06, "loss": 0.1206, "step": 5135 }, { "epoch": 1.2550588235294118, "grad_norm": 3.5901026725769043, "learning_rate": 1.6805e-06, "loss": 0.1431, "step": 5140 }, { "epoch": 1.2556470588235293, "grad_norm": 4.429934501647949, "learning_rate": 1.6779999999999999e-06, "loss": 0.1607, "step": 5145 }, { "epoch": 1.2562352941176471, "grad_norm": 4.39696741104126, "learning_rate": 1.6754999999999999e-06, "loss": 0.1493, "step": 5150 }, { "epoch": 1.2568235294117647, "grad_norm": 4.4410810470581055, "learning_rate": 1.673e-06, "loss": 0.1538, "step": 5155 }, { "epoch": 1.2574117647058825, "grad_norm": 4.128857612609863, "learning_rate": 1.6705e-06, "loss": 0.1455, "step": 5160 }, { "epoch": 1.258, "grad_norm": 4.204104900360107, "learning_rate": 1.668e-06, "loss": 0.1355, "step": 5165 }, { "epoch": 1.2585882352941176, "grad_norm": 3.7360575199127197, "learning_rate": 1.6655e-06, "loss": 0.1627, "step": 5170 }, { "epoch": 1.2591764705882353, "grad_norm": 4.093734264373779, "learning_rate": 1.663e-06, "loss": 0.1627, "step": 5175 }, { "epoch": 1.259764705882353, "grad_norm": 4.456553936004639, "learning_rate": 1.6605e-06, "loss": 0.245, "step": 5180 }, { "epoch": 1.2603529411764707, "grad_norm": 3.6106510162353516, "learning_rate": 1.6579999999999998e-06, "loss": 0.2343, "step": 5185 }, { "epoch": 1.2609411764705882, "grad_norm": 4.5803961753845215, "learning_rate": 1.6554999999999999e-06, "loss": 0.2755, "step": 5190 }, { "epoch": 1.2615294117647058, "grad_norm": 4.099979400634766, "learning_rate": 1.6529999999999999e-06, "loss": 0.2807, "step": 5195 }, { "epoch": 1.2621176470588236, "grad_norm": 3.987185001373291, "learning_rate": 1.6505e-06, "loss": 0.2971, "step": 5200 }, { "epoch": 1.2627058823529411, "grad_norm": 4.153828144073486, "learning_rate": 1.648e-06, "loss": 0.2871, "step": 5205 }, { "epoch": 1.263294117647059, "grad_norm": 3.5098612308502197, "learning_rate": 1.6455e-06, "loss": 0.2916, "step": 5210 }, { "epoch": 1.2638823529411765, "grad_norm": 5.631099224090576, "learning_rate": 1.643e-06, "loss": 0.3333, "step": 5215 }, { "epoch": 1.264470588235294, "grad_norm": 4.602212429046631, "learning_rate": 1.6405e-06, "loss": 0.2624, "step": 5220 }, { "epoch": 1.2650588235294118, "grad_norm": 3.299997329711914, "learning_rate": 1.6379999999999998e-06, "loss": 0.2613, "step": 5225 }, { "epoch": 1.2656470588235293, "grad_norm": 4.192801475524902, "learning_rate": 1.6354999999999998e-06, "loss": 0.3006, "step": 5230 }, { "epoch": 1.2662352941176471, "grad_norm": 4.211850643157959, "learning_rate": 1.6329999999999999e-06, "loss": 0.2822, "step": 5235 }, { "epoch": 1.2668235294117647, "grad_norm": 4.2276225090026855, "learning_rate": 1.6304999999999999e-06, "loss": 0.2932, "step": 5240 }, { "epoch": 1.2674117647058822, "grad_norm": 3.734010696411133, "learning_rate": 1.628e-06, "loss": 0.2454, "step": 5245 }, { "epoch": 1.268, "grad_norm": 3.6597421169281006, "learning_rate": 1.6255e-06, "loss": 0.2676, "step": 5250 }, { "epoch": 1.2685882352941176, "grad_norm": 3.4369189739227295, "learning_rate": 1.623e-06, "loss": 0.3146, "step": 5255 }, { "epoch": 1.2691764705882354, "grad_norm": 3.8520984649658203, "learning_rate": 1.6205e-06, "loss": 0.2772, "step": 5260 }, { "epoch": 1.269764705882353, "grad_norm": 4.008859634399414, "learning_rate": 1.618e-06, "loss": 0.2686, "step": 5265 }, { "epoch": 1.2703529411764705, "grad_norm": 4.360683917999268, "learning_rate": 1.6154999999999998e-06, "loss": 0.2986, "step": 5270 }, { "epoch": 1.2709411764705882, "grad_norm": 4.8565778732299805, "learning_rate": 1.6129999999999998e-06, "loss": 0.3161, "step": 5275 }, { "epoch": 1.271529411764706, "grad_norm": 3.6678411960601807, "learning_rate": 1.6104999999999999e-06, "loss": 0.2558, "step": 5280 }, { "epoch": 1.2721176470588236, "grad_norm": 4.223144054412842, "learning_rate": 1.608e-06, "loss": 0.329, "step": 5285 }, { "epoch": 1.2727058823529411, "grad_norm": 3.626837968826294, "learning_rate": 1.6055e-06, "loss": 0.259, "step": 5290 }, { "epoch": 1.2732941176470587, "grad_norm": 3.6403021812438965, "learning_rate": 1.603e-06, "loss": 0.3152, "step": 5295 }, { "epoch": 1.2738823529411765, "grad_norm": 3.72471284866333, "learning_rate": 1.6005e-06, "loss": 0.277, "step": 5300 }, { "epoch": 1.2744705882352942, "grad_norm": 4.602299213409424, "learning_rate": 1.598e-06, "loss": 0.2439, "step": 5305 }, { "epoch": 1.2750588235294118, "grad_norm": 3.8840904235839844, "learning_rate": 1.5954999999999998e-06, "loss": 0.1849, "step": 5310 }, { "epoch": 1.2756470588235294, "grad_norm": 4.675822734832764, "learning_rate": 1.5929999999999998e-06, "loss": 0.2344, "step": 5315 }, { "epoch": 1.2762352941176471, "grad_norm": 3.3800487518310547, "learning_rate": 1.5904999999999998e-06, "loss": 0.1226, "step": 5320 }, { "epoch": 1.2768235294117647, "grad_norm": 4.323106288909912, "learning_rate": 1.588e-06, "loss": 0.128, "step": 5325 }, { "epoch": 1.2774117647058825, "grad_norm": 3.7697246074676514, "learning_rate": 1.5855e-06, "loss": 0.1426, "step": 5330 }, { "epoch": 1.278, "grad_norm": 4.346513271331787, "learning_rate": 1.583e-06, "loss": 0.1266, "step": 5335 }, { "epoch": 1.2785882352941176, "grad_norm": 2.959198474884033, "learning_rate": 1.5805e-06, "loss": 0.1265, "step": 5340 }, { "epoch": 1.2791764705882354, "grad_norm": 3.369516611099243, "learning_rate": 1.578e-06, "loss": 0.1154, "step": 5345 }, { "epoch": 1.279764705882353, "grad_norm": 3.6160264015197754, "learning_rate": 1.5754999999999998e-06, "loss": 0.1084, "step": 5350 }, { "epoch": 1.2803529411764707, "grad_norm": 3.8888204097747803, "learning_rate": 1.5729999999999998e-06, "loss": 0.1394, "step": 5355 }, { "epoch": 1.2809411764705882, "grad_norm": 4.40725564956665, "learning_rate": 1.5705e-06, "loss": 0.1241, "step": 5360 }, { "epoch": 1.2815294117647058, "grad_norm": 3.9142019748687744, "learning_rate": 1.568e-06, "loss": 0.1214, "step": 5365 }, { "epoch": 1.2821176470588236, "grad_norm": 3.1366117000579834, "learning_rate": 1.5654999999999999e-06, "loss": 0.1363, "step": 5370 }, { "epoch": 1.2827058823529411, "grad_norm": 4.196997165679932, "learning_rate": 1.563e-06, "loss": 0.135, "step": 5375 }, { "epoch": 1.283294117647059, "grad_norm": 4.525932788848877, "learning_rate": 1.5605e-06, "loss": 0.1176, "step": 5380 }, { "epoch": 1.2838823529411765, "grad_norm": 3.4980721473693848, "learning_rate": 1.558e-06, "loss": 0.1266, "step": 5385 }, { "epoch": 1.284470588235294, "grad_norm": 4.069739818572998, "learning_rate": 1.5555e-06, "loss": 0.1528, "step": 5390 }, { "epoch": 1.2850588235294118, "grad_norm": 5.085138320922852, "learning_rate": 1.5529999999999998e-06, "loss": 0.1328, "step": 5395 }, { "epoch": 1.2856470588235294, "grad_norm": 4.206113815307617, "learning_rate": 1.5505e-06, "loss": 0.1475, "step": 5400 }, { "epoch": 1.2862352941176471, "grad_norm": 4.170754909515381, "learning_rate": 1.548e-06, "loss": 0.1509, "step": 5405 }, { "epoch": 1.2868235294117647, "grad_norm": 4.154270648956299, "learning_rate": 1.5455e-06, "loss": 0.1265, "step": 5410 }, { "epoch": 1.2874117647058823, "grad_norm": 4.568661212921143, "learning_rate": 1.5429999999999999e-06, "loss": 0.149, "step": 5415 }, { "epoch": 1.288, "grad_norm": 4.251669406890869, "learning_rate": 1.5405e-06, "loss": 0.1447, "step": 5420 }, { "epoch": 1.2885882352941176, "grad_norm": 4.7127580642700195, "learning_rate": 1.538e-06, "loss": 0.1711, "step": 5425 }, { "epoch": 1.2891764705882354, "grad_norm": 4.346589088439941, "learning_rate": 1.5355e-06, "loss": 0.1569, "step": 5430 }, { "epoch": 1.289764705882353, "grad_norm": 5.3657684326171875, "learning_rate": 1.5329999999999998e-06, "loss": 0.1634, "step": 5435 }, { "epoch": 1.2903529411764705, "grad_norm": 5.058385848999023, "learning_rate": 1.5305e-06, "loss": 0.1557, "step": 5440 }, { "epoch": 1.2909411764705883, "grad_norm": 4.116656303405762, "learning_rate": 1.528e-06, "loss": 0.1099, "step": 5445 }, { "epoch": 1.2915294117647058, "grad_norm": 5.149948596954346, "learning_rate": 1.5255e-06, "loss": 0.1634, "step": 5450 }, { "epoch": 1.2921176470588236, "grad_norm": 4.472373962402344, "learning_rate": 1.5229999999999999e-06, "loss": 0.1401, "step": 5455 }, { "epoch": 1.2927058823529411, "grad_norm": 3.9739813804626465, "learning_rate": 1.5204999999999999e-06, "loss": 0.1326, "step": 5460 }, { "epoch": 1.2932941176470587, "grad_norm": 3.560962438583374, "learning_rate": 1.518e-06, "loss": 0.1428, "step": 5465 }, { "epoch": 1.2938823529411765, "grad_norm": 3.8850462436676025, "learning_rate": 1.5155e-06, "loss": 0.127, "step": 5470 }, { "epoch": 1.294470588235294, "grad_norm": 3.78078031539917, "learning_rate": 1.5129999999999997e-06, "loss": 0.1324, "step": 5475 }, { "epoch": 1.2950588235294118, "grad_norm": 3.7669694423675537, "learning_rate": 1.5105e-06, "loss": 0.141, "step": 5480 }, { "epoch": 1.2956470588235294, "grad_norm": 3.8245644569396973, "learning_rate": 1.508e-06, "loss": 0.1276, "step": 5485 }, { "epoch": 1.296235294117647, "grad_norm": 3.5193772315979004, "learning_rate": 1.5055e-06, "loss": 0.133, "step": 5490 }, { "epoch": 1.2968235294117647, "grad_norm": 4.068570613861084, "learning_rate": 1.5029999999999998e-06, "loss": 0.1368, "step": 5495 }, { "epoch": 1.2974117647058825, "grad_norm": 3.9393699169158936, "learning_rate": 1.5004999999999999e-06, "loss": 0.1411, "step": 5500 }, { "epoch": 1.2974117647058825, "eval_loss": 0.14982947707176208, "eval_runtime": 215.2426, "eval_samples_per_second": 8.191, "eval_steps_per_second": 2.049, "eval_wer": 9.376684030896353, "step": 5500 }, { "epoch": 1.298, "grad_norm": 4.209099769592285, "learning_rate": 1.4979999999999999e-06, "loss": 0.1509, "step": 5505 }, { "epoch": 1.2985882352941176, "grad_norm": 4.658881187438965, "learning_rate": 1.4955e-06, "loss": 0.1544, "step": 5510 }, { "epoch": 1.2991764705882354, "grad_norm": 3.649226427078247, "learning_rate": 1.493e-06, "loss": 0.1417, "step": 5515 }, { "epoch": 1.299764705882353, "grad_norm": 6.161725997924805, "learning_rate": 1.4905e-06, "loss": 0.1423, "step": 5520 }, { "epoch": 1.3003529411764707, "grad_norm": 4.952603340148926, "learning_rate": 1.488e-06, "loss": 0.1527, "step": 5525 }, { "epoch": 1.3009411764705883, "grad_norm": 3.71806263923645, "learning_rate": 1.4855e-06, "loss": 0.1517, "step": 5530 }, { "epoch": 1.3015294117647058, "grad_norm": 4.352254390716553, "learning_rate": 1.483e-06, "loss": 0.1396, "step": 5535 }, { "epoch": 1.3021176470588236, "grad_norm": 3.780989408493042, "learning_rate": 1.4804999999999998e-06, "loss": 0.1473, "step": 5540 }, { "epoch": 1.3027058823529412, "grad_norm": 4.054777145385742, "learning_rate": 1.4779999999999999e-06, "loss": 0.1569, "step": 5545 }, { "epoch": 1.303294117647059, "grad_norm": 4.288354396820068, "learning_rate": 1.4754999999999999e-06, "loss": 0.1611, "step": 5550 }, { "epoch": 1.3038823529411765, "grad_norm": 4.643240928649902, "learning_rate": 1.473e-06, "loss": 0.1378, "step": 5555 }, { "epoch": 1.304470588235294, "grad_norm": 4.157869338989258, "learning_rate": 1.4705e-06, "loss": 0.1441, "step": 5560 }, { "epoch": 1.3050588235294118, "grad_norm": 4.7058281898498535, "learning_rate": 1.468e-06, "loss": 0.1583, "step": 5565 }, { "epoch": 1.3056470588235294, "grad_norm": 4.262722969055176, "learning_rate": 1.4655e-06, "loss": 0.1729, "step": 5570 }, { "epoch": 1.3062352941176472, "grad_norm": 3.914971113204956, "learning_rate": 1.463e-06, "loss": 0.151, "step": 5575 }, { "epoch": 1.3068235294117647, "grad_norm": 4.290412425994873, "learning_rate": 1.4604999999999998e-06, "loss": 0.1388, "step": 5580 }, { "epoch": 1.3074117647058823, "grad_norm": 3.6601831912994385, "learning_rate": 1.4579999999999998e-06, "loss": 0.1348, "step": 5585 }, { "epoch": 1.308, "grad_norm": 4.204775810241699, "learning_rate": 1.4554999999999999e-06, "loss": 0.1469, "step": 5590 }, { "epoch": 1.3085882352941176, "grad_norm": 3.479379177093506, "learning_rate": 1.4530000000000001e-06, "loss": 0.1303, "step": 5595 }, { "epoch": 1.3091764705882354, "grad_norm": 4.314781188964844, "learning_rate": 1.4505e-06, "loss": 0.1604, "step": 5600 }, { "epoch": 1.309764705882353, "grad_norm": 3.843196392059326, "learning_rate": 1.448e-06, "loss": 0.134, "step": 5605 }, { "epoch": 1.3103529411764705, "grad_norm": 3.735947608947754, "learning_rate": 1.4455e-06, "loss": 0.1312, "step": 5610 }, { "epoch": 1.3109411764705883, "grad_norm": 4.729022026062012, "learning_rate": 1.443e-06, "loss": 0.1644, "step": 5615 }, { "epoch": 1.3115294117647058, "grad_norm": 4.890264987945557, "learning_rate": 1.4404999999999998e-06, "loss": 0.2067, "step": 5620 }, { "epoch": 1.3121176470588236, "grad_norm": 4.902050971984863, "learning_rate": 1.4379999999999998e-06, "loss": 0.2568, "step": 5625 }, { "epoch": 1.3127058823529412, "grad_norm": 4.505701065063477, "learning_rate": 1.4354999999999999e-06, "loss": 0.2346, "step": 5630 }, { "epoch": 1.3132941176470587, "grad_norm": 3.9549448490142822, "learning_rate": 1.433e-06, "loss": 0.2954, "step": 5635 }, { "epoch": 1.3138823529411765, "grad_norm": 4.335541725158691, "learning_rate": 1.4305000000000001e-06, "loss": 0.2588, "step": 5640 }, { "epoch": 1.314470588235294, "grad_norm": 3.576043128967285, "learning_rate": 1.428e-06, "loss": 0.2565, "step": 5645 }, { "epoch": 1.3150588235294118, "grad_norm": 3.748084306716919, "learning_rate": 1.4255e-06, "loss": 0.3003, "step": 5650 }, { "epoch": 1.3156470588235294, "grad_norm": 3.494511604309082, "learning_rate": 1.423e-06, "loss": 0.2469, "step": 5655 }, { "epoch": 1.316235294117647, "grad_norm": 3.62790584564209, "learning_rate": 1.4205e-06, "loss": 0.2932, "step": 5660 }, { "epoch": 1.3168235294117647, "grad_norm": 3.6939284801483154, "learning_rate": 1.4179999999999998e-06, "loss": 0.2718, "step": 5665 }, { "epoch": 1.3174117647058823, "grad_norm": 4.1338582038879395, "learning_rate": 1.4154999999999998e-06, "loss": 0.2791, "step": 5670 }, { "epoch": 1.318, "grad_norm": 4.934860706329346, "learning_rate": 1.413e-06, "loss": 0.3268, "step": 5675 }, { "epoch": 1.3185882352941176, "grad_norm": 4.054742336273193, "learning_rate": 1.4105e-06, "loss": 0.3093, "step": 5680 }, { "epoch": 1.3191764705882352, "grad_norm": 3.9524009227752686, "learning_rate": 1.408e-06, "loss": 0.2495, "step": 5685 }, { "epoch": 1.319764705882353, "grad_norm": 3.7696330547332764, "learning_rate": 1.4055e-06, "loss": 0.2114, "step": 5690 }, { "epoch": 1.3203529411764705, "grad_norm": 3.6492321491241455, "learning_rate": 1.403e-06, "loss": 0.1819, "step": 5695 }, { "epoch": 1.3209411764705883, "grad_norm": 3.7122840881347656, "learning_rate": 1.4005e-06, "loss": 0.2423, "step": 5700 }, { "epoch": 1.3215294117647058, "grad_norm": 3.999228000640869, "learning_rate": 1.3979999999999998e-06, "loss": 0.1962, "step": 5705 }, { "epoch": 1.3221176470588234, "grad_norm": 4.307826519012451, "learning_rate": 1.3954999999999998e-06, "loss": 0.1673, "step": 5710 }, { "epoch": 1.3227058823529412, "grad_norm": 4.241715431213379, "learning_rate": 1.393e-06, "loss": 0.1428, "step": 5715 }, { "epoch": 1.323294117647059, "grad_norm": 7.8790411949157715, "learning_rate": 1.3905e-06, "loss": 0.2306, "step": 5720 }, { "epoch": 1.3238823529411765, "grad_norm": 3.9571187496185303, "learning_rate": 1.3879999999999999e-06, "loss": 0.1202, "step": 5725 }, { "epoch": 1.324470588235294, "grad_norm": 4.934453010559082, "learning_rate": 1.3855e-06, "loss": 0.1575, "step": 5730 }, { "epoch": 1.3250588235294118, "grad_norm": 5.463171482086182, "learning_rate": 1.383e-06, "loss": 0.1439, "step": 5735 }, { "epoch": 1.3256470588235294, "grad_norm": 4.415980339050293, "learning_rate": 1.3805e-06, "loss": 0.1445, "step": 5740 }, { "epoch": 1.3262352941176472, "grad_norm": 5.536924362182617, "learning_rate": 1.3779999999999998e-06, "loss": 0.1642, "step": 5745 }, { "epoch": 1.3268235294117647, "grad_norm": 4.790528297424316, "learning_rate": 1.3754999999999998e-06, "loss": 0.1462, "step": 5750 }, { "epoch": 1.3274117647058823, "grad_norm": 3.9238874912261963, "learning_rate": 1.373e-06, "loss": 0.1437, "step": 5755 }, { "epoch": 1.328, "grad_norm": 3.7432122230529785, "learning_rate": 1.3705e-06, "loss": 0.1412, "step": 5760 }, { "epoch": 1.3285882352941176, "grad_norm": 4.449925422668457, "learning_rate": 1.368e-06, "loss": 0.1337, "step": 5765 }, { "epoch": 1.3291764705882354, "grad_norm": 3.7513277530670166, "learning_rate": 1.3654999999999999e-06, "loss": 0.1463, "step": 5770 }, { "epoch": 1.329764705882353, "grad_norm": 4.60488224029541, "learning_rate": 1.363e-06, "loss": 0.1678, "step": 5775 }, { "epoch": 1.3303529411764705, "grad_norm": 4.11804723739624, "learning_rate": 1.3605e-06, "loss": 0.159, "step": 5780 }, { "epoch": 1.3309411764705883, "grad_norm": 5.360220432281494, "learning_rate": 1.358e-06, "loss": 0.167, "step": 5785 }, { "epoch": 1.3315294117647059, "grad_norm": 4.960263252258301, "learning_rate": 1.3554999999999998e-06, "loss": 0.1382, "step": 5790 }, { "epoch": 1.3321176470588236, "grad_norm": 5.07258415222168, "learning_rate": 1.353e-06, "loss": 0.1421, "step": 5795 }, { "epoch": 1.3327058823529412, "grad_norm": 4.397154808044434, "learning_rate": 1.3505e-06, "loss": 0.1444, "step": 5800 }, { "epoch": 1.3332941176470587, "grad_norm": 5.192692279815674, "learning_rate": 1.348e-06, "loss": 0.1476, "step": 5805 }, { "epoch": 1.3338823529411765, "grad_norm": 3.793745279312134, "learning_rate": 1.3454999999999999e-06, "loss": 0.1207, "step": 5810 }, { "epoch": 1.334470588235294, "grad_norm": 4.436985492706299, "learning_rate": 1.3429999999999999e-06, "loss": 0.1235, "step": 5815 }, { "epoch": 1.3350588235294119, "grad_norm": 5.145812034606934, "learning_rate": 1.3405e-06, "loss": 0.1223, "step": 5820 }, { "epoch": 1.3356470588235294, "grad_norm": 5.848226547241211, "learning_rate": 1.338e-06, "loss": 0.1014, "step": 5825 }, { "epoch": 1.336235294117647, "grad_norm": 4.811103343963623, "learning_rate": 1.3354999999999997e-06, "loss": 0.104, "step": 5830 }, { "epoch": 1.3368235294117647, "grad_norm": 5.367855548858643, "learning_rate": 1.333e-06, "loss": 0.0933, "step": 5835 }, { "epoch": 1.3374117647058823, "grad_norm": 4.622622966766357, "learning_rate": 1.3305e-06, "loss": 0.0681, "step": 5840 }, { "epoch": 1.338, "grad_norm": 2.9361040592193604, "learning_rate": 1.328e-06, "loss": 0.0679, "step": 5845 }, { "epoch": 1.3385882352941176, "grad_norm": 3.7007155418395996, "learning_rate": 1.3254999999999998e-06, "loss": 0.0577, "step": 5850 }, { "epoch": 1.3391764705882352, "grad_norm": 4.131628036499023, "learning_rate": 1.3229999999999999e-06, "loss": 0.0565, "step": 5855 }, { "epoch": 1.339764705882353, "grad_norm": 2.0773260593414307, "learning_rate": 1.3204999999999999e-06, "loss": 0.0547, "step": 5860 }, { "epoch": 1.3403529411764705, "grad_norm": 3.2589571475982666, "learning_rate": 1.318e-06, "loss": 0.0571, "step": 5865 }, { "epoch": 1.3409411764705883, "grad_norm": 4.2986321449279785, "learning_rate": 1.3154999999999997e-06, "loss": 0.0586, "step": 5870 }, { "epoch": 1.3415294117647059, "grad_norm": 4.546779155731201, "learning_rate": 1.313e-06, "loss": 0.0778, "step": 5875 }, { "epoch": 1.3421176470588234, "grad_norm": 3.8324365615844727, "learning_rate": 1.3105e-06, "loss": 0.1026, "step": 5880 }, { "epoch": 1.3427058823529412, "grad_norm": 4.707197189331055, "learning_rate": 1.308e-06, "loss": 0.1496, "step": 5885 }, { "epoch": 1.3432941176470587, "grad_norm": 3.8762009143829346, "learning_rate": 1.3055e-06, "loss": 0.1164, "step": 5890 }, { "epoch": 1.3438823529411765, "grad_norm": 4.834984302520752, "learning_rate": 1.3029999999999998e-06, "loss": 0.1519, "step": 5895 }, { "epoch": 1.344470588235294, "grad_norm": 4.856634616851807, "learning_rate": 1.3004999999999999e-06, "loss": 0.133, "step": 5900 }, { "epoch": 1.3450588235294116, "grad_norm": 3.2726635932922363, "learning_rate": 1.298e-06, "loss": 0.1634, "step": 5905 }, { "epoch": 1.3456470588235294, "grad_norm": 4.546060562133789, "learning_rate": 1.2955000000000001e-06, "loss": 0.1477, "step": 5910 }, { "epoch": 1.3462352941176472, "grad_norm": 4.841416835784912, "learning_rate": 1.293e-06, "loss": 0.1774, "step": 5915 }, { "epoch": 1.3468235294117648, "grad_norm": 4.733778476715088, "learning_rate": 1.2905e-06, "loss": 0.1588, "step": 5920 }, { "epoch": 1.3474117647058823, "grad_norm": 4.547962665557861, "learning_rate": 1.288e-06, "loss": 0.1447, "step": 5925 }, { "epoch": 1.3479999999999999, "grad_norm": 4.604930400848389, "learning_rate": 1.2855e-06, "loss": 0.1645, "step": 5930 }, { "epoch": 1.3485882352941176, "grad_norm": 4.995452404022217, "learning_rate": 1.2829999999999998e-06, "loss": 0.157, "step": 5935 }, { "epoch": 1.3491764705882354, "grad_norm": 4.300716876983643, "learning_rate": 1.2804999999999999e-06, "loss": 0.1664, "step": 5940 }, { "epoch": 2.0001176470588233, "grad_norm": 4.5490922927856445, "learning_rate": 1.2779999999999999e-06, "loss": 0.1486, "step": 5945 }, { "epoch": 2.0007058823529413, "grad_norm": 3.8646700382232666, "learning_rate": 1.2755000000000001e-06, "loss": 0.114, "step": 5950 }, { "epoch": 2.001294117647059, "grad_norm": 2.756864309310913, "learning_rate": 1.273e-06, "loss": 0.0986, "step": 5955 }, { "epoch": 2.0018823529411764, "grad_norm": 3.432535409927368, "learning_rate": 1.2705e-06, "loss": 0.0941, "step": 5960 }, { "epoch": 2.002470588235294, "grad_norm": 3.0825603008270264, "learning_rate": 1.268e-06, "loss": 0.0978, "step": 5965 }, { "epoch": 2.0030588235294116, "grad_norm": 3.597517251968384, "learning_rate": 1.2655e-06, "loss": 0.1101, "step": 5970 }, { "epoch": 2.0036470588235296, "grad_norm": 3.56708025932312, "learning_rate": 1.2629999999999998e-06, "loss": 0.1037, "step": 5975 }, { "epoch": 2.004235294117647, "grad_norm": 4.2819647789001465, "learning_rate": 1.2604999999999998e-06, "loss": 0.1104, "step": 5980 }, { "epoch": 2.0048235294117647, "grad_norm": 3.8464691638946533, "learning_rate": 1.2579999999999999e-06, "loss": 0.1015, "step": 5985 }, { "epoch": 2.0054117647058822, "grad_norm": 3.722219944000244, "learning_rate": 1.2555e-06, "loss": 0.1072, "step": 5990 }, { "epoch": 2.006, "grad_norm": 3.585920572280884, "learning_rate": 1.253e-06, "loss": 0.106, "step": 5995 }, { "epoch": 2.006588235294118, "grad_norm": 3.929980754852295, "learning_rate": 1.2505e-06, "loss": 0.1204, "step": 6000 }, { "epoch": 2.006588235294118, "eval_loss": 0.14453859627246857, "eval_runtime": 215.0469, "eval_samples_per_second": 8.198, "eval_steps_per_second": 2.051, "eval_wer": 8.81983114783546, "step": 6000 }, { "epoch": 2.0071764705882353, "grad_norm": 4.532388687133789, "learning_rate": 1.248e-06, "loss": 0.1102, "step": 6005 }, { "epoch": 2.007764705882353, "grad_norm": 3.4759392738342285, "learning_rate": 1.2455e-06, "loss": 0.1393, "step": 6010 }, { "epoch": 2.0083529411764705, "grad_norm": 4.172723293304443, "learning_rate": 1.243e-06, "loss": 0.1228, "step": 6015 }, { "epoch": 2.0089411764705885, "grad_norm": 2.889864921569824, "learning_rate": 1.2404999999999998e-06, "loss": 0.1129, "step": 6020 }, { "epoch": 2.009529411764706, "grad_norm": 4.672086715698242, "learning_rate": 1.2379999999999998e-06, "loss": 0.1392, "step": 6025 }, { "epoch": 2.0101176470588236, "grad_norm": 3.3857226371765137, "learning_rate": 1.2355e-06, "loss": 0.1065, "step": 6030 }, { "epoch": 2.010705882352941, "grad_norm": 3.2488632202148438, "learning_rate": 1.233e-06, "loss": 0.1189, "step": 6035 }, { "epoch": 2.0112941176470587, "grad_norm": 3.8868961334228516, "learning_rate": 1.2305e-06, "loss": 0.0998, "step": 6040 }, { "epoch": 2.0118823529411767, "grad_norm": 3.816267490386963, "learning_rate": 1.228e-06, "loss": 0.1045, "step": 6045 }, { "epoch": 2.0124705882352942, "grad_norm": 3.4660704135894775, "learning_rate": 1.2255e-06, "loss": 0.1253, "step": 6050 }, { "epoch": 2.013058823529412, "grad_norm": 3.8826613426208496, "learning_rate": 1.223e-06, "loss": 0.1139, "step": 6055 }, { "epoch": 2.0136470588235293, "grad_norm": 3.0647642612457275, "learning_rate": 1.2204999999999998e-06, "loss": 0.1061, "step": 6060 }, { "epoch": 2.014235294117647, "grad_norm": 3.7288174629211426, "learning_rate": 1.2179999999999998e-06, "loss": 0.1098, "step": 6065 }, { "epoch": 2.014823529411765, "grad_norm": 3.600821018218994, "learning_rate": 1.2155e-06, "loss": 0.1178, "step": 6070 }, { "epoch": 2.0154117647058825, "grad_norm": 3.9806370735168457, "learning_rate": 1.213e-06, "loss": 0.1202, "step": 6075 }, { "epoch": 2.016, "grad_norm": 5.6414408683776855, "learning_rate": 1.2104999999999999e-06, "loss": 0.1036, "step": 6080 }, { "epoch": 2.0165882352941176, "grad_norm": 4.030844211578369, "learning_rate": 1.208e-06, "loss": 0.1217, "step": 6085 }, { "epoch": 2.017176470588235, "grad_norm": 3.3928050994873047, "learning_rate": 1.2055e-06, "loss": 0.1265, "step": 6090 }, { "epoch": 2.017764705882353, "grad_norm": 3.8300514221191406, "learning_rate": 1.203e-06, "loss": 0.0987, "step": 6095 }, { "epoch": 2.0183529411764707, "grad_norm": 3.3006465435028076, "learning_rate": 1.2004999999999998e-06, "loss": 0.1068, "step": 6100 }, { "epoch": 2.0189411764705882, "grad_norm": 3.1402029991149902, "learning_rate": 1.1979999999999998e-06, "loss": 0.1159, "step": 6105 }, { "epoch": 2.019529411764706, "grad_norm": 3.641228199005127, "learning_rate": 1.1955e-06, "loss": 0.1047, "step": 6110 }, { "epoch": 2.0201176470588234, "grad_norm": 3.623260259628296, "learning_rate": 1.193e-06, "loss": 0.1211, "step": 6115 }, { "epoch": 2.0207058823529414, "grad_norm": 3.462038516998291, "learning_rate": 1.1904999999999999e-06, "loss": 0.1146, "step": 6120 }, { "epoch": 2.021294117647059, "grad_norm": 3.2017321586608887, "learning_rate": 1.1879999999999999e-06, "loss": 0.1142, "step": 6125 }, { "epoch": 2.0218823529411765, "grad_norm": 3.420053005218506, "learning_rate": 1.1855e-06, "loss": 0.1481, "step": 6130 }, { "epoch": 2.022470588235294, "grad_norm": 2.7333104610443115, "learning_rate": 1.183e-06, "loss": 0.1324, "step": 6135 }, { "epoch": 2.0230588235294116, "grad_norm": 3.8676540851593018, "learning_rate": 1.1805e-06, "loss": 0.1214, "step": 6140 }, { "epoch": 2.0236470588235296, "grad_norm": 3.517268419265747, "learning_rate": 1.178e-06, "loss": 0.1436, "step": 6145 }, { "epoch": 2.024235294117647, "grad_norm": 3.365556478500366, "learning_rate": 1.1755e-06, "loss": 0.1492, "step": 6150 }, { "epoch": 2.0248235294117647, "grad_norm": 3.608767032623291, "learning_rate": 1.173e-06, "loss": 0.1556, "step": 6155 }, { "epoch": 2.0254117647058822, "grad_norm": 4.036674976348877, "learning_rate": 1.1705e-06, "loss": 0.1636, "step": 6160 }, { "epoch": 2.026, "grad_norm": 3.033473014831543, "learning_rate": 1.1679999999999999e-06, "loss": 0.109, "step": 6165 }, { "epoch": 2.026588235294118, "grad_norm": 3.256957530975342, "learning_rate": 1.1655e-06, "loss": 0.1444, "step": 6170 }, { "epoch": 2.0271764705882354, "grad_norm": 3.2348921298980713, "learning_rate": 1.163e-06, "loss": 0.1602, "step": 6175 }, { "epoch": 2.027764705882353, "grad_norm": 3.1489288806915283, "learning_rate": 1.1605e-06, "loss": 0.1379, "step": 6180 }, { "epoch": 2.0283529411764705, "grad_norm": 3.440734624862671, "learning_rate": 1.158e-06, "loss": 0.1325, "step": 6185 }, { "epoch": 2.028941176470588, "grad_norm": 3.7099127769470215, "learning_rate": 1.1555e-06, "loss": 0.1525, "step": 6190 }, { "epoch": 2.029529411764706, "grad_norm": 3.6191725730895996, "learning_rate": 1.153e-06, "loss": 0.2177, "step": 6195 }, { "epoch": 2.0301176470588236, "grad_norm": 3.388805389404297, "learning_rate": 1.1505e-06, "loss": 0.2346, "step": 6200 }, { "epoch": 2.030705882352941, "grad_norm": 3.704744577407837, "learning_rate": 1.1479999999999999e-06, "loss": 0.2469, "step": 6205 }, { "epoch": 2.0312941176470587, "grad_norm": 4.3756208419799805, "learning_rate": 1.1454999999999999e-06, "loss": 0.2576, "step": 6210 }, { "epoch": 2.0318823529411763, "grad_norm": 3.235971689224243, "learning_rate": 1.143e-06, "loss": 0.2312, "step": 6215 }, { "epoch": 2.0324705882352943, "grad_norm": 3.424992799758911, "learning_rate": 1.1405e-06, "loss": 0.2102, "step": 6220 }, { "epoch": 2.033058823529412, "grad_norm": 3.4489128589630127, "learning_rate": 1.138e-06, "loss": 0.2383, "step": 6225 }, { "epoch": 2.0336470588235294, "grad_norm": 3.260072946548462, "learning_rate": 1.1355e-06, "loss": 0.2472, "step": 6230 }, { "epoch": 2.034235294117647, "grad_norm": 3.7301695346832275, "learning_rate": 1.133e-06, "loss": 0.264, "step": 6235 }, { "epoch": 2.034823529411765, "grad_norm": 3.305539846420288, "learning_rate": 1.1305e-06, "loss": 0.2514, "step": 6240 }, { "epoch": 2.0354117647058825, "grad_norm": 5.363631725311279, "learning_rate": 1.1279999999999998e-06, "loss": 0.317, "step": 6245 }, { "epoch": 2.036, "grad_norm": 2.9786622524261475, "learning_rate": 1.1254999999999999e-06, "loss": 0.2179, "step": 6250 }, { "epoch": 2.0365882352941176, "grad_norm": 3.7055511474609375, "learning_rate": 1.1229999999999999e-06, "loss": 0.2576, "step": 6255 }, { "epoch": 2.037176470588235, "grad_norm": 4.172769546508789, "learning_rate": 1.1205e-06, "loss": 0.2958, "step": 6260 }, { "epoch": 2.037764705882353, "grad_norm": 3.434136390686035, "learning_rate": 1.1180000000000001e-06, "loss": 0.2079, "step": 6265 }, { "epoch": 2.0383529411764707, "grad_norm": 3.089663505554199, "learning_rate": 1.1155e-06, "loss": 0.2044, "step": 6270 }, { "epoch": 2.0389411764705883, "grad_norm": 3.1077966690063477, "learning_rate": 1.113e-06, "loss": 0.2289, "step": 6275 }, { "epoch": 2.039529411764706, "grad_norm": 3.710737466812134, "learning_rate": 1.1105e-06, "loss": 0.2858, "step": 6280 }, { "epoch": 2.0401176470588234, "grad_norm": 3.1117355823516846, "learning_rate": 1.108e-06, "loss": 0.2747, "step": 6285 }, { "epoch": 2.0407058823529414, "grad_norm": 3.763277769088745, "learning_rate": 1.1054999999999998e-06, "loss": 0.2883, "step": 6290 }, { "epoch": 2.041294117647059, "grad_norm": 4.688112258911133, "learning_rate": 1.1029999999999999e-06, "loss": 0.2606, "step": 6295 }, { "epoch": 2.0418823529411765, "grad_norm": 3.533370018005371, "learning_rate": 1.1004999999999999e-06, "loss": 0.2419, "step": 6300 }, { "epoch": 2.042470588235294, "grad_norm": 3.3103272914886475, "learning_rate": 1.0980000000000001e-06, "loss": 0.2713, "step": 6305 }, { "epoch": 2.0430588235294116, "grad_norm": 3.523777961730957, "learning_rate": 1.0955e-06, "loss": 0.2239, "step": 6310 }, { "epoch": 2.0436470588235296, "grad_norm": 4.249201774597168, "learning_rate": 1.093e-06, "loss": 0.1974, "step": 6315 }, { "epoch": 2.044235294117647, "grad_norm": 4.11465311050415, "learning_rate": 1.0905e-06, "loss": 0.2044, "step": 6320 }, { "epoch": 2.0448235294117647, "grad_norm": 3.5423130989074707, "learning_rate": 1.088e-06, "loss": 0.162, "step": 6325 }, { "epoch": 2.0454117647058823, "grad_norm": 3.9773755073547363, "learning_rate": 1.0854999999999998e-06, "loss": 0.2065, "step": 6330 }, { "epoch": 2.046, "grad_norm": 3.983093023300171, "learning_rate": 1.0829999999999998e-06, "loss": 0.1433, "step": 6335 }, { "epoch": 2.046588235294118, "grad_norm": 4.718307971954346, "learning_rate": 1.0804999999999999e-06, "loss": 0.1638, "step": 6340 }, { "epoch": 2.0471764705882354, "grad_norm": 3.779331684112549, "learning_rate": 1.078e-06, "loss": 0.1399, "step": 6345 }, { "epoch": 2.047764705882353, "grad_norm": 3.832612991333008, "learning_rate": 1.0755e-06, "loss": 0.1186, "step": 6350 }, { "epoch": 2.0483529411764705, "grad_norm": 4.264135837554932, "learning_rate": 1.073e-06, "loss": 0.1352, "step": 6355 }, { "epoch": 2.048941176470588, "grad_norm": 3.9030492305755615, "learning_rate": 1.0705e-06, "loss": 0.1165, "step": 6360 }, { "epoch": 2.049529411764706, "grad_norm": 2.9888110160827637, "learning_rate": 1.068e-06, "loss": 0.1087, "step": 6365 }, { "epoch": 2.0501176470588236, "grad_norm": 4.803068161010742, "learning_rate": 1.0654999999999998e-06, "loss": 0.0951, "step": 6370 }, { "epoch": 2.050705882352941, "grad_norm": 4.80842399597168, "learning_rate": 1.0629999999999998e-06, "loss": 0.1083, "step": 6375 }, { "epoch": 2.0512941176470587, "grad_norm": 3.8328697681427, "learning_rate": 1.0605e-06, "loss": 0.1196, "step": 6380 }, { "epoch": 2.0518823529411763, "grad_norm": 3.9421403408050537, "learning_rate": 1.058e-06, "loss": 0.1379, "step": 6385 }, { "epoch": 2.0524705882352943, "grad_norm": 3.450986623764038, "learning_rate": 1.0555e-06, "loss": 0.1236, "step": 6390 }, { "epoch": 2.053058823529412, "grad_norm": 3.6049773693084717, "learning_rate": 1.053e-06, "loss": 0.0962, "step": 6395 }, { "epoch": 2.0536470588235294, "grad_norm": 3.73565673828125, "learning_rate": 1.0505e-06, "loss": 0.0941, "step": 6400 }, { "epoch": 2.054235294117647, "grad_norm": 3.547741413116455, "learning_rate": 1.048e-06, "loss": 0.1141, "step": 6405 }, { "epoch": 2.0548235294117645, "grad_norm": 4.58786678314209, "learning_rate": 1.0455e-06, "loss": 0.1143, "step": 6410 }, { "epoch": 2.0554117647058825, "grad_norm": 3.944561719894409, "learning_rate": 1.0429999999999998e-06, "loss": 0.1074, "step": 6415 }, { "epoch": 2.056, "grad_norm": 4.274311542510986, "learning_rate": 1.0405e-06, "loss": 0.1182, "step": 6420 }, { "epoch": 2.0565882352941176, "grad_norm": 3.816572427749634, "learning_rate": 1.038e-06, "loss": 0.1088, "step": 6425 }, { "epoch": 2.057176470588235, "grad_norm": 4.115599155426025, "learning_rate": 1.0355e-06, "loss": 0.1218, "step": 6430 }, { "epoch": 2.057764705882353, "grad_norm": 4.810798168182373, "learning_rate": 1.0329999999999999e-06, "loss": 0.1127, "step": 6435 }, { "epoch": 2.0583529411764707, "grad_norm": 3.578666925430298, "learning_rate": 1.0305e-06, "loss": 0.1021, "step": 6440 }, { "epoch": 2.0589411764705883, "grad_norm": 4.24717903137207, "learning_rate": 1.028e-06, "loss": 0.1163, "step": 6445 }, { "epoch": 2.059529411764706, "grad_norm": 3.5776548385620117, "learning_rate": 1.0255e-06, "loss": 0.1031, "step": 6450 }, { "epoch": 2.0601176470588234, "grad_norm": 4.303883075714111, "learning_rate": 1.0229999999999998e-06, "loss": 0.1213, "step": 6455 }, { "epoch": 2.0607058823529414, "grad_norm": 4.101013660430908, "learning_rate": 1.0205e-06, "loss": 0.1091, "step": 6460 }, { "epoch": 2.061294117647059, "grad_norm": 4.718240261077881, "learning_rate": 1.018e-06, "loss": 0.1187, "step": 6465 }, { "epoch": 2.0618823529411765, "grad_norm": 3.5659213066101074, "learning_rate": 1.0155e-06, "loss": 0.1191, "step": 6470 }, { "epoch": 2.062470588235294, "grad_norm": 4.361692428588867, "learning_rate": 1.0129999999999999e-06, "loss": 0.1253, "step": 6475 }, { "epoch": 2.0630588235294116, "grad_norm": 3.8293938636779785, "learning_rate": 1.0105e-06, "loss": 0.1096, "step": 6480 }, { "epoch": 2.0636470588235296, "grad_norm": 4.964282512664795, "learning_rate": 1.008e-06, "loss": 0.1388, "step": 6485 }, { "epoch": 2.064235294117647, "grad_norm": 4.066312313079834, "learning_rate": 1.0055e-06, "loss": 0.1242, "step": 6490 }, { "epoch": 2.0648235294117647, "grad_norm": 3.4401395320892334, "learning_rate": 1.0029999999999998e-06, "loss": 0.1073, "step": 6495 }, { "epoch": 2.0654117647058823, "grad_norm": 5.105144500732422, "learning_rate": 1.0005e-06, "loss": 0.1312, "step": 6500 }, { "epoch": 2.0654117647058823, "eval_loss": 0.14444409310817719, "eval_runtime": 216.7276, "eval_samples_per_second": 8.135, "eval_steps_per_second": 2.035, "eval_wer": 8.891683132746541, "step": 6500 }, { "epoch": 2.066, "grad_norm": 4.376410007476807, "learning_rate": 9.98e-07, "loss": 0.1113, "step": 6505 }, { "epoch": 2.066588235294118, "grad_norm": 4.354100227355957, "learning_rate": 9.955e-07, "loss": 0.1244, "step": 6510 }, { "epoch": 2.0671764705882354, "grad_norm": 4.840188503265381, "learning_rate": 9.929999999999999e-07, "loss": 0.1147, "step": 6515 }, { "epoch": 2.067764705882353, "grad_norm": 4.096179008483887, "learning_rate": 9.905e-07, "loss": 0.1117, "step": 6520 }, { "epoch": 2.0683529411764705, "grad_norm": 4.350401401519775, "learning_rate": 9.88e-07, "loss": 0.101, "step": 6525 }, { "epoch": 2.068941176470588, "grad_norm": 4.278405666351318, "learning_rate": 9.855e-07, "loss": 0.109, "step": 6530 }, { "epoch": 2.069529411764706, "grad_norm": 4.757381439208984, "learning_rate": 9.83e-07, "loss": 0.1041, "step": 6535 }, { "epoch": 2.0701176470588236, "grad_norm": 4.786244869232178, "learning_rate": 9.805e-07, "loss": 0.1369, "step": 6540 }, { "epoch": 2.070705882352941, "grad_norm": 5.087813854217529, "learning_rate": 9.78e-07, "loss": 0.1383, "step": 6545 }, { "epoch": 2.0712941176470587, "grad_norm": 4.794870853424072, "learning_rate": 9.755e-07, "loss": 0.1076, "step": 6550 }, { "epoch": 2.0718823529411763, "grad_norm": 4.405334949493408, "learning_rate": 9.729999999999998e-07, "loss": 0.117, "step": 6555 }, { "epoch": 2.0724705882352943, "grad_norm": 4.8106794357299805, "learning_rate": 9.705e-07, "loss": 0.1126, "step": 6560 }, { "epoch": 2.073058823529412, "grad_norm": 4.539356708526611, "learning_rate": 9.679999999999999e-07, "loss": 0.1223, "step": 6565 }, { "epoch": 2.0736470588235294, "grad_norm": 3.424386501312256, "learning_rate": 9.655e-07, "loss": 0.0909, "step": 6570 }, { "epoch": 2.074235294117647, "grad_norm": 3.402427911758423, "learning_rate": 9.63e-07, "loss": 0.0757, "step": 6575 }, { "epoch": 2.0748235294117645, "grad_norm": 2.65620493888855, "learning_rate": 9.605e-07, "loss": 0.0654, "step": 6580 }, { "epoch": 2.0754117647058825, "grad_norm": 3.615248441696167, "learning_rate": 9.58e-07, "loss": 0.0751, "step": 6585 }, { "epoch": 2.076, "grad_norm": 2.962472438812256, "learning_rate": 9.555e-07, "loss": 0.0704, "step": 6590 }, { "epoch": 2.0765882352941176, "grad_norm": 4.129675388336182, "learning_rate": 9.529999999999999e-07, "loss": 0.0762, "step": 6595 }, { "epoch": 2.077176470588235, "grad_norm": 3.8912012577056885, "learning_rate": 9.504999999999999e-07, "loss": 0.0675, "step": 6600 }, { "epoch": 2.0777647058823527, "grad_norm": 1.8364770412445068, "learning_rate": 9.479999999999999e-07, "loss": 0.0752, "step": 6605 }, { "epoch": 2.0783529411764707, "grad_norm": 3.7567691802978516, "learning_rate": 9.455e-07, "loss": 0.0695, "step": 6610 }, { "epoch": 2.0789411764705883, "grad_norm": 2.7998435497283936, "learning_rate": 9.429999999999999e-07, "loss": 0.0823, "step": 6615 }, { "epoch": 2.079529411764706, "grad_norm": 3.291718006134033, "learning_rate": 9.404999999999999e-07, "loss": 0.0726, "step": 6620 }, { "epoch": 2.0801176470588234, "grad_norm": 3.1633670330047607, "learning_rate": 9.379999999999998e-07, "loss": 0.0798, "step": 6625 }, { "epoch": 2.080705882352941, "grad_norm": 2.643526077270508, "learning_rate": 9.355e-07, "loss": 0.0839, "step": 6630 }, { "epoch": 2.081294117647059, "grad_norm": 4.571191310882568, "learning_rate": 9.33e-07, "loss": 0.0827, "step": 6635 }, { "epoch": 2.0818823529411765, "grad_norm": 3.765843152999878, "learning_rate": 9.304999999999999e-07, "loss": 0.0621, "step": 6640 }, { "epoch": 2.082470588235294, "grad_norm": 3.009443998336792, "learning_rate": 9.28e-07, "loss": 0.0617, "step": 6645 }, { "epoch": 2.0830588235294116, "grad_norm": 2.8628644943237305, "learning_rate": 9.255e-07, "loss": 0.0632, "step": 6650 }, { "epoch": 2.083647058823529, "grad_norm": 3.840031147003174, "learning_rate": 9.23e-07, "loss": 0.0539, "step": 6655 }, { "epoch": 2.084235294117647, "grad_norm": 3.0769946575164795, "learning_rate": 9.204999999999999e-07, "loss": 0.0589, "step": 6660 }, { "epoch": 2.0848235294117647, "grad_norm": 2.5203821659088135, "learning_rate": 9.18e-07, "loss": 0.0437, "step": 6665 }, { "epoch": 2.0854117647058823, "grad_norm": 3.190497875213623, "learning_rate": 9.155e-07, "loss": 0.05, "step": 6670 }, { "epoch": 2.086, "grad_norm": 1.5927996635437012, "learning_rate": 9.13e-07, "loss": 0.03, "step": 6675 }, { "epoch": 2.086588235294118, "grad_norm": 2.5160109996795654, "learning_rate": 9.104999999999999e-07, "loss": 0.0351, "step": 6680 }, { "epoch": 2.0871764705882354, "grad_norm": 2.2040927410125732, "learning_rate": 9.08e-07, "loss": 0.0298, "step": 6685 }, { "epoch": 2.087764705882353, "grad_norm": 3.2412350177764893, "learning_rate": 9.055e-07, "loss": 0.0349, "step": 6690 }, { "epoch": 2.0883529411764705, "grad_norm": 3.2960755825042725, "learning_rate": 9.03e-07, "loss": 0.1213, "step": 6695 }, { "epoch": 2.088941176470588, "grad_norm": 3.835237979888916, "learning_rate": 9.004999999999999e-07, "loss": 0.1621, "step": 6700 }, { "epoch": 2.089529411764706, "grad_norm": 3.8612561225891113, "learning_rate": 8.98e-07, "loss": 0.2094, "step": 6705 }, { "epoch": 2.0901176470588236, "grad_norm": 4.534683704376221, "learning_rate": 8.954999999999999e-07, "loss": 0.2388, "step": 6710 }, { "epoch": 2.090705882352941, "grad_norm": 3.9867632389068604, "learning_rate": 8.93e-07, "loss": 0.2126, "step": 6715 }, { "epoch": 2.0912941176470587, "grad_norm": 4.662811756134033, "learning_rate": 8.904999999999999e-07, "loss": 0.2486, "step": 6720 }, { "epoch": 2.0918823529411763, "grad_norm": 4.110130786895752, "learning_rate": 8.88e-07, "loss": 0.2732, "step": 6725 }, { "epoch": 2.0924705882352943, "grad_norm": 3.996330499649048, "learning_rate": 8.854999999999999e-07, "loss": 0.2367, "step": 6730 }, { "epoch": 2.093058823529412, "grad_norm": 3.7603931427001953, "learning_rate": 8.83e-07, "loss": 0.2918, "step": 6735 }, { "epoch": 2.0936470588235294, "grad_norm": 4.353634834289551, "learning_rate": 8.804999999999999e-07, "loss": 0.2709, "step": 6740 }, { "epoch": 2.094235294117647, "grad_norm": 3.893925428390503, "learning_rate": 8.78e-07, "loss": 0.2258, "step": 6745 }, { "epoch": 2.0948235294117645, "grad_norm": 2.99426007270813, "learning_rate": 8.754999999999999e-07, "loss": 0.2097, "step": 6750 }, { "epoch": 2.0954117647058825, "grad_norm": 4.228275299072266, "learning_rate": 8.729999999999999e-07, "loss": 0.2324, "step": 6755 }, { "epoch": 2.096, "grad_norm": 3.552263021469116, "learning_rate": 8.705e-07, "loss": 0.1815, "step": 6760 }, { "epoch": 2.0965882352941176, "grad_norm": 3.580173969268799, "learning_rate": 8.68e-07, "loss": 0.2231, "step": 6765 }, { "epoch": 2.097176470588235, "grad_norm": 4.917387962341309, "learning_rate": 8.655e-07, "loss": 0.2355, "step": 6770 }, { "epoch": 2.0977647058823528, "grad_norm": 2.4592792987823486, "learning_rate": 8.629999999999999e-07, "loss": 0.1329, "step": 6775 }, { "epoch": 2.0983529411764708, "grad_norm": 2.7361538410186768, "learning_rate": 8.605e-07, "loss": 0.1399, "step": 6780 }, { "epoch": 2.0989411764705883, "grad_norm": 3.3431153297424316, "learning_rate": 8.58e-07, "loss": 0.147, "step": 6785 }, { "epoch": 2.099529411764706, "grad_norm": 3.4092767238616943, "learning_rate": 8.555e-07, "loss": 0.1109, "step": 6790 }, { "epoch": 2.1001176470588234, "grad_norm": 3.6129932403564453, "learning_rate": 8.529999999999999e-07, "loss": 0.1807, "step": 6795 }, { "epoch": 2.100705882352941, "grad_norm": 3.3218915462493896, "learning_rate": 8.504999999999999e-07, "loss": 0.1415, "step": 6800 }, { "epoch": 2.101294117647059, "grad_norm": 3.04013991355896, "learning_rate": 8.48e-07, "loss": 0.1425, "step": 6805 }, { "epoch": 2.1018823529411765, "grad_norm": 2.857480049133301, "learning_rate": 8.455e-07, "loss": 0.1184, "step": 6810 }, { "epoch": 2.102470588235294, "grad_norm": 2.5954349040985107, "learning_rate": 8.429999999999999e-07, "loss": 0.1193, "step": 6815 }, { "epoch": 2.1030588235294116, "grad_norm": 2.576098680496216, "learning_rate": 8.404999999999999e-07, "loss": 0.1098, "step": 6820 }, { "epoch": 2.103647058823529, "grad_norm": 3.5981452465057373, "learning_rate": 8.38e-07, "loss": 0.1341, "step": 6825 }, { "epoch": 2.104235294117647, "grad_norm": 4.554183006286621, "learning_rate": 8.355e-07, "loss": 0.1273, "step": 6830 }, { "epoch": 2.1048235294117648, "grad_norm": 4.2153544425964355, "learning_rate": 8.329999999999999e-07, "loss": 0.1316, "step": 6835 }, { "epoch": 2.1054117647058823, "grad_norm": 3.460341453552246, "learning_rate": 8.304999999999999e-07, "loss": 0.0985, "step": 6840 }, { "epoch": 2.106, "grad_norm": 3.8283193111419678, "learning_rate": 8.28e-07, "loss": 0.107, "step": 6845 }, { "epoch": 2.106588235294118, "grad_norm": 4.030230522155762, "learning_rate": 8.255e-07, "loss": 0.122, "step": 6850 }, { "epoch": 2.1071764705882354, "grad_norm": 3.7896857261657715, "learning_rate": 8.229999999999999e-07, "loss": 0.1142, "step": 6855 }, { "epoch": 2.107764705882353, "grad_norm": 4.195309162139893, "learning_rate": 8.205e-07, "loss": 0.1141, "step": 6860 }, { "epoch": 2.1083529411764705, "grad_norm": 4.070171356201172, "learning_rate": 8.179999999999999e-07, "loss": 0.1077, "step": 6865 }, { "epoch": 2.108941176470588, "grad_norm": 3.907820224761963, "learning_rate": 8.155e-07, "loss": 0.1227, "step": 6870 }, { "epoch": 2.109529411764706, "grad_norm": 3.8243227005004883, "learning_rate": 8.129999999999999e-07, "loss": 0.1064, "step": 6875 }, { "epoch": 2.1101176470588237, "grad_norm": 4.594846725463867, "learning_rate": 8.105e-07, "loss": 0.1257, "step": 6880 }, { "epoch": 2.110705882352941, "grad_norm": 4.080305099487305, "learning_rate": 8.08e-07, "loss": 0.1106, "step": 6885 }, { "epoch": 2.1112941176470588, "grad_norm": 3.4250431060791016, "learning_rate": 8.055e-07, "loss": 0.1234, "step": 6890 }, { "epoch": 2.1118823529411763, "grad_norm": 4.03692626953125, "learning_rate": 8.03e-07, "loss": 0.1068, "step": 6895 }, { "epoch": 2.1124705882352943, "grad_norm": 2.5986828804016113, "learning_rate": 8.005e-07, "loss": 0.0892, "step": 6900 }, { "epoch": 2.113058823529412, "grad_norm": 2.9794490337371826, "learning_rate": 7.98e-07, "loss": 0.1019, "step": 6905 }, { "epoch": 2.1136470588235294, "grad_norm": 3.1859021186828613, "learning_rate": 7.954999999999999e-07, "loss": 0.1097, "step": 6910 }, { "epoch": 2.114235294117647, "grad_norm": 3.8603951930999756, "learning_rate": 7.93e-07, "loss": 0.1037, "step": 6915 }, { "epoch": 2.1148235294117645, "grad_norm": 4.164231777191162, "learning_rate": 7.905e-07, "loss": 0.0997, "step": 6920 }, { "epoch": 2.1154117647058825, "grad_norm": 3.744278907775879, "learning_rate": 7.88e-07, "loss": 0.1012, "step": 6925 }, { "epoch": 2.116, "grad_norm": 3.5061264038085938, "learning_rate": 7.854999999999999e-07, "loss": 0.0943, "step": 6930 }, { "epoch": 2.1165882352941177, "grad_norm": 3.5475871562957764, "learning_rate": 7.83e-07, "loss": 0.0875, "step": 6935 }, { "epoch": 2.117176470588235, "grad_norm": 4.007967472076416, "learning_rate": 7.805e-07, "loss": 0.1034, "step": 6940 }, { "epoch": 2.1177647058823528, "grad_norm": 4.606170654296875, "learning_rate": 7.78e-07, "loss": 0.1312, "step": 6945 }, { "epoch": 2.1183529411764708, "grad_norm": 4.265864849090576, "learning_rate": 7.754999999999999e-07, "loss": 0.1099, "step": 6950 }, { "epoch": 2.1189411764705883, "grad_norm": 3.904383659362793, "learning_rate": 7.729999999999999e-07, "loss": 0.1132, "step": 6955 }, { "epoch": 2.119529411764706, "grad_norm": 4.232688903808594, "learning_rate": 7.705e-07, "loss": 0.119, "step": 6960 }, { "epoch": 2.1201176470588234, "grad_norm": 3.9303131103515625, "learning_rate": 7.68e-07, "loss": 0.1315, "step": 6965 }, { "epoch": 2.120705882352941, "grad_norm": 4.14497709274292, "learning_rate": 7.654999999999999e-07, "loss": 0.1243, "step": 6970 }, { "epoch": 2.121294117647059, "grad_norm": 3.447218179702759, "learning_rate": 7.629999999999999e-07, "loss": 0.1217, "step": 6975 }, { "epoch": 2.1218823529411766, "grad_norm": 4.5269575119018555, "learning_rate": 7.605e-07, "loss": 0.1006, "step": 6980 }, { "epoch": 2.122470588235294, "grad_norm": 5.081904888153076, "learning_rate": 7.58e-07, "loss": 0.1361, "step": 6985 }, { "epoch": 2.1230588235294117, "grad_norm": 5.222269058227539, "learning_rate": 7.554999999999999e-07, "loss": 0.1419, "step": 6990 }, { "epoch": 2.123647058823529, "grad_norm": 5.557333946228027, "learning_rate": 7.529999999999999e-07, "loss": 0.1315, "step": 6995 }, { "epoch": 2.124235294117647, "grad_norm": 4.206608295440674, "learning_rate": 7.505e-07, "loss": 0.1409, "step": 7000 }, { "epoch": 2.124235294117647, "eval_loss": 0.13788723945617676, "eval_runtime": 221.5371, "eval_samples_per_second": 7.958, "eval_steps_per_second": 1.991, "eval_wer": 8.828812645949343, "step": 7000 }, { "epoch": 2.1248235294117648, "grad_norm": 7.366815090179443, "learning_rate": 7.48e-07, "loss": 0.134, "step": 7005 }, { "epoch": 2.1254117647058823, "grad_norm": 4.966409683227539, "learning_rate": 7.455e-07, "loss": 0.1428, "step": 7010 }, { "epoch": 2.126, "grad_norm": 4.947187900543213, "learning_rate": 7.429999999999999e-07, "loss": 0.1198, "step": 7015 }, { "epoch": 2.1265882352941174, "grad_norm": 3.2959718704223633, "learning_rate": 7.405e-07, "loss": 0.076, "step": 7020 }, { "epoch": 2.1271764705882354, "grad_norm": 3.850759267807007, "learning_rate": 7.38e-07, "loss": 0.0684, "step": 7025 }, { "epoch": 2.127764705882353, "grad_norm": 2.4587864875793457, "learning_rate": 7.355e-07, "loss": 0.0533, "step": 7030 }, { "epoch": 2.1283529411764706, "grad_norm": 3.584787130355835, "learning_rate": 7.329999999999999e-07, "loss": 0.0489, "step": 7035 }, { "epoch": 2.128941176470588, "grad_norm": 2.0744030475616455, "learning_rate": 7.305e-07, "loss": 0.0464, "step": 7040 }, { "epoch": 2.1295294117647057, "grad_norm": 2.12754487991333, "learning_rate": 7.28e-07, "loss": 0.0349, "step": 7045 }, { "epoch": 2.1301176470588237, "grad_norm": 3.2488489151000977, "learning_rate": 7.255e-07, "loss": 0.0293, "step": 7050 }, { "epoch": 2.1307058823529412, "grad_norm": 2.068417549133301, "learning_rate": 7.229999999999999e-07, "loss": 0.0306, "step": 7055 }, { "epoch": 2.131294117647059, "grad_norm": 1.6034528017044067, "learning_rate": 7.205e-07, "loss": 0.0216, "step": 7060 }, { "epoch": 2.1318823529411763, "grad_norm": 1.4879192113876343, "learning_rate": 7.179999999999999e-07, "loss": 0.0183, "step": 7065 }, { "epoch": 2.132470588235294, "grad_norm": 1.7684077024459839, "learning_rate": 7.155e-07, "loss": 0.0238, "step": 7070 }, { "epoch": 2.133058823529412, "grad_norm": 3.7660133838653564, "learning_rate": 7.129999999999999e-07, "loss": 0.0519, "step": 7075 }, { "epoch": 2.1336470588235295, "grad_norm": 3.101461172103882, "learning_rate": 7.105e-07, "loss": 0.0652, "step": 7080 }, { "epoch": 2.134235294117647, "grad_norm": 3.3564295768737793, "learning_rate": 7.079999999999999e-07, "loss": 0.0692, "step": 7085 }, { "epoch": 2.1348235294117646, "grad_norm": 4.043341159820557, "learning_rate": 7.055e-07, "loss": 0.0986, "step": 7090 }, { "epoch": 2.135411764705882, "grad_norm": 4.174405097961426, "learning_rate": 7.029999999999999e-07, "loss": 0.1052, "step": 7095 }, { "epoch": 2.136, "grad_norm": 4.68574857711792, "learning_rate": 7.005e-07, "loss": 0.1087, "step": 7100 }, { "epoch": 2.1365882352941177, "grad_norm": 4.526766777038574, "learning_rate": 6.979999999999999e-07, "loss": 0.1114, "step": 7105 }, { "epoch": 2.1371764705882352, "grad_norm": 4.121020317077637, "learning_rate": 6.955e-07, "loss": 0.119, "step": 7110 }, { "epoch": 2.137764705882353, "grad_norm": 3.7789900302886963, "learning_rate": 6.929999999999999e-07, "loss": 0.12, "step": 7115 }, { "epoch": 2.138352941176471, "grad_norm": 3.300339460372925, "learning_rate": 6.905e-07, "loss": 0.1161, "step": 7120 }, { "epoch": 2.1389411764705883, "grad_norm": 3.693659782409668, "learning_rate": 6.879999999999999e-07, "loss": 0.1139, "step": 7125 }, { "epoch": 2.139529411764706, "grad_norm": 4.014922618865967, "learning_rate": 6.854999999999999e-07, "loss": 0.1057, "step": 7130 }, { "epoch": 2.1401176470588235, "grad_norm": 3.247948408126831, "learning_rate": 6.830000000000001e-07, "loss": 0.1199, "step": 7135 }, { "epoch": 2.140705882352941, "grad_norm": 3.206010580062866, "learning_rate": 6.805e-07, "loss": 0.1165, "step": 7140 }, { "epoch": 2.141294117647059, "grad_norm": 3.5866265296936035, "learning_rate": 6.78e-07, "loss": 0.1275, "step": 7145 }, { "epoch": 2.1418823529411766, "grad_norm": 3.2774014472961426, "learning_rate": 6.754999999999999e-07, "loss": 0.1077, "step": 7150 }, { "epoch": 2.142470588235294, "grad_norm": 2.882467031478882, "learning_rate": 6.730000000000001e-07, "loss": 0.1172, "step": 7155 }, { "epoch": 2.1430588235294117, "grad_norm": 4.516074180603027, "learning_rate": 6.705e-07, "loss": 0.1119, "step": 7160 }, { "epoch": 2.1436470588235292, "grad_norm": 2.8667845726013184, "learning_rate": 6.68e-07, "loss": 0.1044, "step": 7165 }, { "epoch": 2.1442352941176472, "grad_norm": 4.690877914428711, "learning_rate": 6.654999999999999e-07, "loss": 0.1152, "step": 7170 }, { "epoch": 2.144823529411765, "grad_norm": 3.591593027114868, "learning_rate": 6.63e-07, "loss": 0.1019, "step": 7175 }, { "epoch": 2.1454117647058824, "grad_norm": 3.767172336578369, "learning_rate": 6.605e-07, "loss": 0.1217, "step": 7180 }, { "epoch": 2.146, "grad_norm": 3.9251527786254883, "learning_rate": 6.58e-07, "loss": 0.1293, "step": 7185 }, { "epoch": 2.1465882352941175, "grad_norm": 4.326487064361572, "learning_rate": 6.554999999999999e-07, "loss": 0.1368, "step": 7190 }, { "epoch": 2.1471764705882355, "grad_norm": 3.75852108001709, "learning_rate": 6.53e-07, "loss": 0.1082, "step": 7195 }, { "epoch": 2.147764705882353, "grad_norm": 3.7618930339813232, "learning_rate": 6.505e-07, "loss": 0.1504, "step": 7200 }, { "epoch": 2.1483529411764706, "grad_norm": 4.214940071105957, "learning_rate": 6.48e-07, "loss": 0.1461, "step": 7205 }, { "epoch": 2.148941176470588, "grad_norm": 4.073136806488037, "learning_rate": 6.454999999999999e-07, "loss": 0.3197, "step": 7210 }, { "epoch": 2.1495294117647057, "grad_norm": 4.714518070220947, "learning_rate": 6.43e-07, "loss": 0.2304, "step": 7215 }, { "epoch": 2.1501176470588237, "grad_norm": 4.21167516708374, "learning_rate": 6.404999999999999e-07, "loss": 0.221, "step": 7220 }, { "epoch": 2.1507058823529412, "grad_norm": 4.289494037628174, "learning_rate": 6.38e-07, "loss": 0.2631, "step": 7225 }, { "epoch": 2.151294117647059, "grad_norm": 3.496133327484131, "learning_rate": 6.354999999999999e-07, "loss": 0.2219, "step": 7230 }, { "epoch": 2.1518823529411764, "grad_norm": 4.441636562347412, "learning_rate": 6.33e-07, "loss": 0.2632, "step": 7235 }, { "epoch": 2.152470588235294, "grad_norm": 4.289673328399658, "learning_rate": 6.304999999999999e-07, "loss": 0.2181, "step": 7240 }, { "epoch": 2.153058823529412, "grad_norm": 4.236616611480713, "learning_rate": 6.28e-07, "loss": 0.2313, "step": 7245 }, { "epoch": 2.1536470588235295, "grad_norm": 4.6190032958984375, "learning_rate": 6.254999999999999e-07, "loss": 0.2416, "step": 7250 }, { "epoch": 2.154235294117647, "grad_norm": 3.5950613021850586, "learning_rate": 6.23e-07, "loss": 0.2755, "step": 7255 }, { "epoch": 2.1548235294117646, "grad_norm": 4.494294166564941, "learning_rate": 6.205e-07, "loss": 0.3027, "step": 7260 }, { "epoch": 2.1554117647058826, "grad_norm": 3.704636335372925, "learning_rate": 6.18e-07, "loss": 0.2146, "step": 7265 }, { "epoch": 2.156, "grad_norm": 4.351371765136719, "learning_rate": 6.155e-07, "loss": 0.2063, "step": 7270 }, { "epoch": 2.1565882352941177, "grad_norm": 3.871393918991089, "learning_rate": 6.13e-07, "loss": 0.1535, "step": 7275 }, { "epoch": 2.1571764705882353, "grad_norm": 4.320761203765869, "learning_rate": 6.105e-07, "loss": 0.1767, "step": 7280 }, { "epoch": 2.157764705882353, "grad_norm": 4.707387447357178, "learning_rate": 6.079999999999999e-07, "loss": 0.1841, "step": 7285 }, { "epoch": 2.158352941176471, "grad_norm": 3.823394536972046, "learning_rate": 6.055e-07, "loss": 0.1236, "step": 7290 }, { "epoch": 2.1589411764705884, "grad_norm": 3.76283860206604, "learning_rate": 6.03e-07, "loss": 0.1435, "step": 7295 }, { "epoch": 2.159529411764706, "grad_norm": 3.5690855979919434, "learning_rate": 6.005e-07, "loss": 0.1208, "step": 7300 }, { "epoch": 2.1601176470588235, "grad_norm": 4.156225204467773, "learning_rate": 5.979999999999999e-07, "loss": 0.1053, "step": 7305 }, { "epoch": 2.160705882352941, "grad_norm": 3.152632713317871, "learning_rate": 5.955e-07, "loss": 0.1153, "step": 7310 }, { "epoch": 2.161294117647059, "grad_norm": 3.1042511463165283, "learning_rate": 5.93e-07, "loss": 0.0837, "step": 7315 }, { "epoch": 2.1618823529411766, "grad_norm": 4.395899772644043, "learning_rate": 5.905e-07, "loss": 0.1141, "step": 7320 }, { "epoch": 2.162470588235294, "grad_norm": 3.2605082988739014, "learning_rate": 5.879999999999999e-07, "loss": 0.0952, "step": 7325 }, { "epoch": 2.1630588235294117, "grad_norm": 3.772939443588257, "learning_rate": 5.854999999999999e-07, "loss": 0.1249, "step": 7330 }, { "epoch": 2.1636470588235293, "grad_norm": 6.173035621643066, "learning_rate": 5.83e-07, "loss": 0.218, "step": 7335 }, { "epoch": 2.1642352941176473, "grad_norm": 5.7276835441589355, "learning_rate": 5.805e-07, "loss": 0.458, "step": 7340 }, { "epoch": 2.164823529411765, "grad_norm": 5.8230671882629395, "learning_rate": 5.779999999999999e-07, "loss": 0.3718, "step": 7345 }, { "epoch": 2.1654117647058824, "grad_norm": 3.0928845405578613, "learning_rate": 5.755e-07, "loss": 0.3083, "step": 7350 }, { "epoch": 2.166, "grad_norm": 5.80549430847168, "learning_rate": 5.73e-07, "loss": 0.4234, "step": 7355 }, { "epoch": 2.1665882352941175, "grad_norm": 3.6460976600646973, "learning_rate": 5.705e-07, "loss": 0.2354, "step": 7360 }, { "epoch": 2.1671764705882355, "grad_norm": 3.5840930938720703, "learning_rate": 5.679999999999999e-07, "loss": 0.2409, "step": 7365 }, { "epoch": 2.167764705882353, "grad_norm": 2.924928665161133, "learning_rate": 5.655e-07, "loss": 0.0973, "step": 7370 }, { "epoch": 2.1683529411764706, "grad_norm": 3.2825427055358887, "learning_rate": 5.629999999999999e-07, "loss": 0.1049, "step": 7375 }, { "epoch": 2.168941176470588, "grad_norm": 4.008925914764404, "learning_rate": 5.605e-07, "loss": 0.1377, "step": 7380 }, { "epoch": 2.1695294117647057, "grad_norm": 3.197460412979126, "learning_rate": 5.58e-07, "loss": 0.1577, "step": 7385 }, { "epoch": 2.1701176470588237, "grad_norm": 3.1058945655822754, "learning_rate": 5.555e-07, "loss": 0.1157, "step": 7390 }, { "epoch": 2.1707058823529413, "grad_norm": 3.106741189956665, "learning_rate": 5.53e-07, "loss": 0.1199, "step": 7395 }, { "epoch": 2.171294117647059, "grad_norm": 3.477177619934082, "learning_rate": 5.505e-07, "loss": 0.1009, "step": 7400 }, { "epoch": 2.1718823529411764, "grad_norm": 3.451761484146118, "learning_rate": 5.48e-07, "loss": 0.1035, "step": 7405 }, { "epoch": 2.172470588235294, "grad_norm": 3.388928174972534, "learning_rate": 5.455e-07, "loss": 0.0904, "step": 7410 }, { "epoch": 2.173058823529412, "grad_norm": 3.382169485092163, "learning_rate": 5.43e-07, "loss": 0.131, "step": 7415 }, { "epoch": 2.1736470588235295, "grad_norm": 3.5255074501037598, "learning_rate": 5.405e-07, "loss": 0.0954, "step": 7420 }, { "epoch": 2.174235294117647, "grad_norm": 3.3573575019836426, "learning_rate": 5.38e-07, "loss": 0.108, "step": 7425 }, { "epoch": 2.1748235294117646, "grad_norm": 3.0878665447235107, "learning_rate": 5.355e-07, "loss": 0.108, "step": 7430 }, { "epoch": 2.175411764705882, "grad_norm": 3.495513677597046, "learning_rate": 5.33e-07, "loss": 0.1054, "step": 7435 }, { "epoch": 2.176, "grad_norm": 4.147096157073975, "learning_rate": 5.304999999999999e-07, "loss": 0.123, "step": 7440 }, { "epoch": 2.1765882352941177, "grad_norm": 3.1584293842315674, "learning_rate": 5.28e-07, "loss": 0.1097, "step": 7445 }, { "epoch": 2.1771764705882353, "grad_norm": 3.7239224910736084, "learning_rate": 5.255e-07, "loss": 0.1029, "step": 7450 }, { "epoch": 2.177764705882353, "grad_norm": 3.2867257595062256, "learning_rate": 5.23e-07, "loss": 0.0839, "step": 7455 }, { "epoch": 2.1783529411764704, "grad_norm": 2.6911566257476807, "learning_rate": 5.204999999999999e-07, "loss": 0.0967, "step": 7460 }, { "epoch": 2.1789411764705884, "grad_norm": 3.310439348220825, "learning_rate": 5.18e-07, "loss": 0.0856, "step": 7465 }, { "epoch": 2.179529411764706, "grad_norm": 2.936257839202881, "learning_rate": 5.155e-07, "loss": 0.092, "step": 7470 }, { "epoch": 2.1801176470588235, "grad_norm": 3.030754566192627, "learning_rate": 5.13e-07, "loss": 0.0746, "step": 7475 }, { "epoch": 2.180705882352941, "grad_norm": 3.8321611881256104, "learning_rate": 5.104999999999999e-07, "loss": 0.0979, "step": 7480 }, { "epoch": 2.1812941176470586, "grad_norm": 2.6394224166870117, "learning_rate": 5.079999999999999e-07, "loss": 0.0733, "step": 7485 }, { "epoch": 2.1818823529411766, "grad_norm": 3.6768136024475098, "learning_rate": 5.055e-07, "loss": 0.0766, "step": 7490 }, { "epoch": 2.182470588235294, "grad_norm": 3.125209093093872, "learning_rate": 5.03e-07, "loss": 0.0598, "step": 7495 }, { "epoch": 2.1830588235294117, "grad_norm": 2.5445339679718018, "learning_rate": 5.004999999999999e-07, "loss": 0.0664, "step": 7500 }, { "epoch": 2.1830588235294117, "eval_loss": 0.141168475151062, "eval_runtime": 216.9968, "eval_samples_per_second": 8.125, "eval_steps_per_second": 2.032, "eval_wer": 8.595293694988323, "step": 7500 }, { "epoch": 2.1836470588235293, "grad_norm": 2.3832080364227295, "learning_rate": 4.979999999999999e-07, "loss": 0.0525, "step": 7505 }, { "epoch": 2.184235294117647, "grad_norm": 2.5988590717315674, "learning_rate": 4.955e-07, "loss": 0.0502, "step": 7510 }, { "epoch": 2.184823529411765, "grad_norm": 2.770667791366577, "learning_rate": 4.93e-07, "loss": 0.0788, "step": 7515 }, { "epoch": 2.1854117647058824, "grad_norm": 3.1413490772247314, "learning_rate": 4.905e-07, "loss": 0.0902, "step": 7520 }, { "epoch": 2.186, "grad_norm": 3.1688642501831055, "learning_rate": 4.879999999999999e-07, "loss": 0.096, "step": 7525 }, { "epoch": 2.1865882352941175, "grad_norm": 3.0078189373016357, "learning_rate": 4.854999999999999e-07, "loss": 0.1076, "step": 7530 }, { "epoch": 2.1871764705882355, "grad_norm": 3.890761375427246, "learning_rate": 4.83e-07, "loss": 0.1059, "step": 7535 }, { "epoch": 2.187764705882353, "grad_norm": 3.942343235015869, "learning_rate": 4.805e-07, "loss": 0.1097, "step": 7540 }, { "epoch": 2.1883529411764706, "grad_norm": 3.2998976707458496, "learning_rate": 4.779999999999999e-07, "loss": 0.0865, "step": 7545 }, { "epoch": 2.188941176470588, "grad_norm": 2.881542921066284, "learning_rate": 4.7549999999999994e-07, "loss": 0.0998, "step": 7550 }, { "epoch": 2.1895294117647057, "grad_norm": 3.820716381072998, "learning_rate": 4.7299999999999996e-07, "loss": 0.1117, "step": 7555 }, { "epoch": 2.1901176470588237, "grad_norm": 3.755279064178467, "learning_rate": 4.7049999999999993e-07, "loss": 0.0883, "step": 7560 }, { "epoch": 2.1907058823529413, "grad_norm": 4.218323230743408, "learning_rate": 4.68e-07, "loss": 0.1205, "step": 7565 }, { "epoch": 2.191294117647059, "grad_norm": 4.559285640716553, "learning_rate": 4.655e-07, "loss": 0.1185, "step": 7570 }, { "epoch": 2.1918823529411764, "grad_norm": 5.182135105133057, "learning_rate": 4.63e-07, "loss": 0.1429, "step": 7575 }, { "epoch": 2.192470588235294, "grad_norm": 4.2707014083862305, "learning_rate": 4.605e-07, "loss": 0.1169, "step": 7580 }, { "epoch": 2.193058823529412, "grad_norm": 3.5622217655181885, "learning_rate": 4.58e-07, "loss": 0.1044, "step": 7585 }, { "epoch": 2.1936470588235295, "grad_norm": 3.68119740486145, "learning_rate": 4.5549999999999997e-07, "loss": 0.1027, "step": 7590 }, { "epoch": 2.194235294117647, "grad_norm": 4.375702857971191, "learning_rate": 4.53e-07, "loss": 0.1125, "step": 7595 }, { "epoch": 2.1948235294117646, "grad_norm": 5.240574836730957, "learning_rate": 4.505e-07, "loss": 0.126, "step": 7600 }, { "epoch": 2.195411764705882, "grad_norm": 3.8292157649993896, "learning_rate": 4.48e-07, "loss": 0.1011, "step": 7605 }, { "epoch": 2.196, "grad_norm": 3.7407422065734863, "learning_rate": 4.455e-07, "loss": 0.1082, "step": 7610 }, { "epoch": 2.1965882352941177, "grad_norm": 3.601299285888672, "learning_rate": 4.43e-07, "loss": 0.1182, "step": 7615 }, { "epoch": 2.1971764705882353, "grad_norm": 4.453425407409668, "learning_rate": 4.405e-07, "loss": 0.112, "step": 7620 }, { "epoch": 2.197764705882353, "grad_norm": 3.592487096786499, "learning_rate": 4.38e-07, "loss": 0.1236, "step": 7625 }, { "epoch": 2.1983529411764704, "grad_norm": 4.056696891784668, "learning_rate": 4.355e-07, "loss": 0.1158, "step": 7630 }, { "epoch": 2.1989411764705884, "grad_norm": 4.749520301818848, "learning_rate": 4.3299999999999997e-07, "loss": 0.1062, "step": 7635 }, { "epoch": 2.199529411764706, "grad_norm": 4.881409168243408, "learning_rate": 4.305e-07, "loss": 0.1058, "step": 7640 }, { "epoch": 2.2001176470588235, "grad_norm": 4.620895862579346, "learning_rate": 4.2799999999999997e-07, "loss": 0.1435, "step": 7645 }, { "epoch": 2.200705882352941, "grad_norm": 5.204020977020264, "learning_rate": 4.255e-07, "loss": 0.1499, "step": 7650 }, { "epoch": 2.2012941176470586, "grad_norm": 5.232980251312256, "learning_rate": 4.2299999999999996e-07, "loss": 0.1421, "step": 7655 }, { "epoch": 2.2018823529411766, "grad_norm": 4.4692535400390625, "learning_rate": 4.205e-07, "loss": 0.1723, "step": 7660 }, { "epoch": 2.202470588235294, "grad_norm": 4.134629249572754, "learning_rate": 4.1799999999999996e-07, "loss": 0.1732, "step": 7665 }, { "epoch": 2.2030588235294117, "grad_norm": 5.042556285858154, "learning_rate": 4.155e-07, "loss": 0.1621, "step": 7670 }, { "epoch": 2.2036470588235293, "grad_norm": 5.022680759429932, "learning_rate": 4.1299999999999995e-07, "loss": 0.155, "step": 7675 }, { "epoch": 2.2042352941176473, "grad_norm": 4.982098579406738, "learning_rate": 4.105e-07, "loss": 0.1365, "step": 7680 }, { "epoch": 2.204823529411765, "grad_norm": 3.920809030532837, "learning_rate": 4.0799999999999995e-07, "loss": 0.1461, "step": 7685 }, { "epoch": 2.2054117647058824, "grad_norm": 5.073742866516113, "learning_rate": 4.055e-07, "loss": 0.1277, "step": 7690 }, { "epoch": 2.206, "grad_norm": 4.812915802001953, "learning_rate": 4.03e-07, "loss": 0.1407, "step": 7695 }, { "epoch": 2.2065882352941175, "grad_norm": 4.743094444274902, "learning_rate": 4.005e-07, "loss": 0.1254, "step": 7700 }, { "epoch": 2.2071764705882355, "grad_norm": 3.9826436042785645, "learning_rate": 3.98e-07, "loss": 0.1122, "step": 7705 }, { "epoch": 2.207764705882353, "grad_norm": 3.9094045162200928, "learning_rate": 3.955e-07, "loss": 0.1259, "step": 7710 }, { "epoch": 2.2083529411764706, "grad_norm": 4.459024906158447, "learning_rate": 3.93e-07, "loss": 0.1239, "step": 7715 }, { "epoch": 2.208941176470588, "grad_norm": 4.36275577545166, "learning_rate": 3.905e-07, "loss": 0.128, "step": 7720 }, { "epoch": 2.2095294117647057, "grad_norm": 5.086589336395264, "learning_rate": 3.88e-07, "loss": 0.1471, "step": 7725 }, { "epoch": 2.2101176470588237, "grad_norm": 3.2869954109191895, "learning_rate": 3.855e-07, "loss": 0.0993, "step": 7730 }, { "epoch": 2.2107058823529413, "grad_norm": 3.411060094833374, "learning_rate": 3.83e-07, "loss": 0.1003, "step": 7735 }, { "epoch": 2.211294117647059, "grad_norm": 4.262996673583984, "learning_rate": 3.805e-07, "loss": 0.1115, "step": 7740 }, { "epoch": 2.2118823529411764, "grad_norm": 3.331576108932495, "learning_rate": 3.7799999999999997e-07, "loss": 0.1047, "step": 7745 }, { "epoch": 2.212470588235294, "grad_norm": 3.57690691947937, "learning_rate": 3.755e-07, "loss": 0.1108, "step": 7750 }, { "epoch": 2.213058823529412, "grad_norm": 4.488544464111328, "learning_rate": 3.7299999999999997e-07, "loss": 0.1239, "step": 7755 }, { "epoch": 2.2136470588235295, "grad_norm": 4.1181230545043945, "learning_rate": 3.705e-07, "loss": 0.1048, "step": 7760 }, { "epoch": 2.214235294117647, "grad_norm": 4.727904319763184, "learning_rate": 3.6799999999999996e-07, "loss": 0.1154, "step": 7765 }, { "epoch": 2.2148235294117646, "grad_norm": 5.384610652923584, "learning_rate": 3.655e-07, "loss": 0.1981, "step": 7770 }, { "epoch": 2.215411764705882, "grad_norm": 5.225000858306885, "learning_rate": 3.6299999999999995e-07, "loss": 0.1997, "step": 7775 }, { "epoch": 2.216, "grad_norm": 3.495725154876709, "learning_rate": 3.605e-07, "loss": 0.2318, "step": 7780 }, { "epoch": 2.2165882352941177, "grad_norm": 4.1302900314331055, "learning_rate": 3.5799999999999995e-07, "loss": 0.2093, "step": 7785 }, { "epoch": 2.2171764705882353, "grad_norm": 3.27402400970459, "learning_rate": 3.555e-07, "loss": 0.1846, "step": 7790 }, { "epoch": 2.217764705882353, "grad_norm": 3.989089012145996, "learning_rate": 3.5299999999999994e-07, "loss": 0.2112, "step": 7795 }, { "epoch": 2.2183529411764704, "grad_norm": 3.929708957672119, "learning_rate": 3.5049999999999997e-07, "loss": 0.2403, "step": 7800 }, { "epoch": 2.2189411764705884, "grad_norm": 3.236370086669922, "learning_rate": 3.4799999999999994e-07, "loss": 0.2343, "step": 7805 }, { "epoch": 2.219529411764706, "grad_norm": 3.854012966156006, "learning_rate": 3.4549999999999996e-07, "loss": 0.292, "step": 7810 }, { "epoch": 2.2201176470588235, "grad_norm": 4.311310291290283, "learning_rate": 3.43e-07, "loss": 0.2505, "step": 7815 }, { "epoch": 2.220705882352941, "grad_norm": 3.8308284282684326, "learning_rate": 3.405e-07, "loss": 0.2779, "step": 7820 }, { "epoch": 2.2212941176470586, "grad_norm": 4.07056188583374, "learning_rate": 3.38e-07, "loss": 0.2695, "step": 7825 }, { "epoch": 2.2218823529411766, "grad_norm": 3.7924697399139404, "learning_rate": 3.355e-07, "loss": 0.2448, "step": 7830 }, { "epoch": 2.222470588235294, "grad_norm": 3.118187189102173, "learning_rate": 3.33e-07, "loss": 0.212, "step": 7835 }, { "epoch": 2.2230588235294118, "grad_norm": 3.67928147315979, "learning_rate": 3.305e-07, "loss": 0.1731, "step": 7840 }, { "epoch": 2.2236470588235293, "grad_norm": 4.920979022979736, "learning_rate": 3.28e-07, "loss": 0.1843, "step": 7845 }, { "epoch": 2.224235294117647, "grad_norm": 4.600193023681641, "learning_rate": 3.255e-07, "loss": 0.1535, "step": 7850 }, { "epoch": 2.224823529411765, "grad_norm": 3.8479647636413574, "learning_rate": 3.23e-07, "loss": 0.1352, "step": 7855 }, { "epoch": 2.2254117647058824, "grad_norm": 3.907224655151367, "learning_rate": 3.205e-07, "loss": 0.1264, "step": 7860 }, { "epoch": 2.226, "grad_norm": 3.0915322303771973, "learning_rate": 3.18e-07, "loss": 0.1189, "step": 7865 }, { "epoch": 2.2265882352941175, "grad_norm": 3.639526128768921, "learning_rate": 3.155e-07, "loss": 0.1095, "step": 7870 }, { "epoch": 2.227176470588235, "grad_norm": 4.66115140914917, "learning_rate": 3.13e-07, "loss": 0.1396, "step": 7875 }, { "epoch": 2.227764705882353, "grad_norm": 4.545446872711182, "learning_rate": 3.105e-07, "loss": 0.1355, "step": 7880 }, { "epoch": 2.2283529411764706, "grad_norm": 4.419951915740967, "learning_rate": 3.08e-07, "loss": 0.1133, "step": 7885 }, { "epoch": 2.228941176470588, "grad_norm": 3.9223968982696533, "learning_rate": 3.055e-07, "loss": 0.1224, "step": 7890 }, { "epoch": 2.2295294117647058, "grad_norm": 3.7462503910064697, "learning_rate": 3.03e-07, "loss": 0.1097, "step": 7895 }, { "epoch": 2.2301176470588233, "grad_norm": 4.397787094116211, "learning_rate": 3.0049999999999997e-07, "loss": 0.2306, "step": 7900 }, { "epoch": 2.2307058823529413, "grad_norm": 6.273893356323242, "learning_rate": 2.98e-07, "loss": 0.1873, "step": 7905 }, { "epoch": 2.231294117647059, "grad_norm": 4.10307502746582, "learning_rate": 2.9549999999999997e-07, "loss": 0.2144, "step": 7910 }, { "epoch": 2.2318823529411764, "grad_norm": 3.6458494663238525, "learning_rate": 2.93e-07, "loss": 0.2559, "step": 7915 }, { "epoch": 2.232470588235294, "grad_norm": 3.807610511779785, "learning_rate": 2.9049999999999996e-07, "loss": 0.2239, "step": 7920 }, { "epoch": 2.2330588235294115, "grad_norm": 4.449942588806152, "learning_rate": 2.88e-07, "loss": 0.2549, "step": 7925 }, { "epoch": 2.2336470588235295, "grad_norm": 4.060321807861328, "learning_rate": 2.8549999999999996e-07, "loss": 0.2407, "step": 7930 }, { "epoch": 2.234235294117647, "grad_norm": 4.180230140686035, "learning_rate": 2.83e-07, "loss": 0.2546, "step": 7935 }, { "epoch": 2.2348235294117647, "grad_norm": 4.1969194412231445, "learning_rate": 2.805e-07, "loss": 0.237, "step": 7940 }, { "epoch": 2.235411764705882, "grad_norm": 4.042295932769775, "learning_rate": 2.7800000000000003e-07, "loss": 0.2444, "step": 7945 }, { "epoch": 2.2359999999999998, "grad_norm": 2.9939708709716797, "learning_rate": 2.755e-07, "loss": 0.2741, "step": 7950 }, { "epoch": 2.2365882352941178, "grad_norm": 3.8539421558380127, "learning_rate": 2.73e-07, "loss": 0.221, "step": 7955 }, { "epoch": 2.2371764705882353, "grad_norm": 4.01802921295166, "learning_rate": 2.705e-07, "loss": 0.2292, "step": 7960 }, { "epoch": 2.237764705882353, "grad_norm": 4.264819145202637, "learning_rate": 2.68e-07, "loss": 0.2169, "step": 7965 }, { "epoch": 2.2383529411764704, "grad_norm": 4.1075944900512695, "learning_rate": 2.655e-07, "loss": 0.2001, "step": 7970 }, { "epoch": 2.2389411764705884, "grad_norm": 3.378434658050537, "learning_rate": 2.63e-07, "loss": 0.1413, "step": 7975 }, { "epoch": 2.239529411764706, "grad_norm": 3.6400043964385986, "learning_rate": 2.605e-07, "loss": 0.15, "step": 7980 }, { "epoch": 2.2401176470588235, "grad_norm": 4.6861186027526855, "learning_rate": 2.58e-07, "loss": 0.151, "step": 7985 }, { "epoch": 2.240705882352941, "grad_norm": 4.246377944946289, "learning_rate": 2.555e-07, "loss": 0.1215, "step": 7990 }, { "epoch": 2.2412941176470587, "grad_norm": 3.2816288471221924, "learning_rate": 2.53e-07, "loss": 0.1083, "step": 7995 }, { "epoch": 2.2418823529411767, "grad_norm": 3.741589069366455, "learning_rate": 2.5049999999999997e-07, "loss": 0.1528, "step": 8000 }, { "epoch": 2.2418823529411767, "eval_loss": 0.1381354033946991, "eval_runtime": 216.0909, "eval_samples_per_second": 8.159, "eval_steps_per_second": 2.041, "eval_wer": 8.810849649721574, "step": 8000 }, { "epoch": 2.242470588235294, "grad_norm": 3.987574577331543, "learning_rate": 2.48e-07, "loss": 0.1116, "step": 8005 }, { "epoch": 2.2430588235294118, "grad_norm": 4.041168689727783, "learning_rate": 2.4549999999999997e-07, "loss": 0.1071, "step": 8010 }, { "epoch": 2.2436470588235293, "grad_norm": 3.152656316757202, "learning_rate": 2.43e-07, "loss": 0.0955, "step": 8015 }, { "epoch": 2.244235294117647, "grad_norm": 4.692921161651611, "learning_rate": 2.4049999999999996e-07, "loss": 0.0966, "step": 8020 }, { "epoch": 2.244823529411765, "grad_norm": 5.015964508056641, "learning_rate": 2.38e-07, "loss": 0.1121, "step": 8025 }, { "epoch": 2.2454117647058824, "grad_norm": 3.540560483932495, "learning_rate": 2.3549999999999998e-07, "loss": 0.1057, "step": 8030 }, { "epoch": 2.246, "grad_norm": 3.7348451614379883, "learning_rate": 2.33e-07, "loss": 0.0951, "step": 8035 }, { "epoch": 2.2465882352941176, "grad_norm": 4.229401588439941, "learning_rate": 2.305e-07, "loss": 0.1237, "step": 8040 }, { "epoch": 2.247176470588235, "grad_norm": 3.671140193939209, "learning_rate": 2.28e-07, "loss": 0.1062, "step": 8045 }, { "epoch": 2.247764705882353, "grad_norm": 4.229209899902344, "learning_rate": 2.255e-07, "loss": 0.1073, "step": 8050 }, { "epoch": 2.2483529411764707, "grad_norm": 4.739395618438721, "learning_rate": 2.23e-07, "loss": 0.104, "step": 8055 }, { "epoch": 2.248941176470588, "grad_norm": 4.503268241882324, "learning_rate": 2.205e-07, "loss": 0.1176, "step": 8060 }, { "epoch": 2.2495294117647058, "grad_norm": 4.381680965423584, "learning_rate": 2.18e-07, "loss": 0.1101, "step": 8065 }, { "epoch": 2.2501176470588238, "grad_norm": 3.2998883724212646, "learning_rate": 2.155e-07, "loss": 0.1096, "step": 8070 }, { "epoch": 2.2507058823529413, "grad_norm": 3.1615946292877197, "learning_rate": 2.13e-07, "loss": 0.1153, "step": 8075 }, { "epoch": 2.251294117647059, "grad_norm": 4.538625240325928, "learning_rate": 2.1049999999999999e-07, "loss": 0.0896, "step": 8080 }, { "epoch": 2.2518823529411764, "grad_norm": 3.3750851154327393, "learning_rate": 2.0799999999999998e-07, "loss": 0.1088, "step": 8085 }, { "epoch": 2.252470588235294, "grad_norm": 2.829025983810425, "learning_rate": 2.0549999999999998e-07, "loss": 0.0773, "step": 8090 }, { "epoch": 2.253058823529412, "grad_norm": 2.83367919921875, "learning_rate": 2.03e-07, "loss": 0.0815, "step": 8095 }, { "epoch": 2.2536470588235296, "grad_norm": 3.252626895904541, "learning_rate": 2.005e-07, "loss": 0.0785, "step": 8100 }, { "epoch": 2.254235294117647, "grad_norm": 2.986762523651123, "learning_rate": 1.98e-07, "loss": 0.0827, "step": 8105 }, { "epoch": 2.2548235294117647, "grad_norm": 2.8114709854125977, "learning_rate": 1.955e-07, "loss": 0.0812, "step": 8110 }, { "epoch": 2.2554117647058822, "grad_norm": 2.9283883571624756, "learning_rate": 1.93e-07, "loss": 0.0887, "step": 8115 }, { "epoch": 2.2560000000000002, "grad_norm": 4.205686092376709, "learning_rate": 1.905e-07, "loss": 0.1397, "step": 8120 }, { "epoch": 2.256588235294118, "grad_norm": 3.9014127254486084, "learning_rate": 1.88e-07, "loss": 0.1542, "step": 8125 }, { "epoch": 2.2571764705882353, "grad_norm": 4.175537109375, "learning_rate": 1.855e-07, "loss": 0.2408, "step": 8130 }, { "epoch": 2.257764705882353, "grad_norm": 4.330829620361328, "learning_rate": 1.8299999999999998e-07, "loss": 0.2259, "step": 8135 }, { "epoch": 2.2583529411764705, "grad_norm": 4.12072229385376, "learning_rate": 1.8049999999999998e-07, "loss": 0.1503, "step": 8140 }, { "epoch": 2.2589411764705885, "grad_norm": 4.38078498840332, "learning_rate": 1.7799999999999998e-07, "loss": 0.1916, "step": 8145 }, { "epoch": 2.259529411764706, "grad_norm": 4.417450428009033, "learning_rate": 1.7549999999999998e-07, "loss": 0.2342, "step": 8150 }, { "epoch": 2.2601176470588236, "grad_norm": 4.579851150512695, "learning_rate": 1.7299999999999997e-07, "loss": 0.2008, "step": 8155 }, { "epoch": 2.260705882352941, "grad_norm": 4.479092121124268, "learning_rate": 1.705e-07, "loss": 0.2651, "step": 8160 }, { "epoch": 2.2612941176470587, "grad_norm": 3.3638620376586914, "learning_rate": 1.68e-07, "loss": 0.2117, "step": 8165 }, { "epoch": 2.2618823529411767, "grad_norm": 3.553847551345825, "learning_rate": 1.655e-07, "loss": 0.1889, "step": 8170 }, { "epoch": 2.2624705882352942, "grad_norm": 3.6857898235321045, "learning_rate": 1.63e-07, "loss": 0.2127, "step": 8175 }, { "epoch": 2.263058823529412, "grad_norm": 4.307986259460449, "learning_rate": 1.605e-07, "loss": 0.2245, "step": 8180 }, { "epoch": 2.2636470588235293, "grad_norm": 4.547567367553711, "learning_rate": 1.5799999999999999e-07, "loss": 0.2024, "step": 8185 }, { "epoch": 2.264235294117647, "grad_norm": 4.1776323318481445, "learning_rate": 1.5549999999999998e-07, "loss": 0.2338, "step": 8190 }, { "epoch": 2.264823529411765, "grad_norm": 4.6070942878723145, "learning_rate": 1.5299999999999998e-07, "loss": 0.2079, "step": 8195 }, { "epoch": 2.2654117647058825, "grad_norm": 5.136123180389404, "learning_rate": 1.5049999999999998e-07, "loss": 0.1966, "step": 8200 }, { "epoch": 2.266, "grad_norm": 4.70625638961792, "learning_rate": 1.4799999999999998e-07, "loss": 0.172, "step": 8205 }, { "epoch": 2.2665882352941176, "grad_norm": 3.954213857650757, "learning_rate": 1.4549999999999997e-07, "loss": 0.1583, "step": 8210 }, { "epoch": 2.267176470588235, "grad_norm": 2.96557354927063, "learning_rate": 1.4299999999999997e-07, "loss": 0.1434, "step": 8215 }, { "epoch": 2.267764705882353, "grad_norm": 3.305049419403076, "learning_rate": 1.4050000000000002e-07, "loss": 0.136, "step": 8220 }, { "epoch": 2.2683529411764707, "grad_norm": 3.86317777633667, "learning_rate": 1.3800000000000002e-07, "loss": 0.135, "step": 8225 }, { "epoch": 2.2689411764705882, "grad_norm": 3.89882493019104, "learning_rate": 1.3550000000000002e-07, "loss": 0.1255, "step": 8230 }, { "epoch": 2.269529411764706, "grad_norm": 3.555644989013672, "learning_rate": 1.33e-07, "loss": 0.178, "step": 8235 }, { "epoch": 2.2701176470588234, "grad_norm": 4.114537239074707, "learning_rate": 1.305e-07, "loss": 0.1471, "step": 8240 }, { "epoch": 2.2707058823529414, "grad_norm": 3.7743608951568604, "learning_rate": 1.28e-07, "loss": 0.1706, "step": 8245 }, { "epoch": 2.271294117647059, "grad_norm": 3.5714948177337646, "learning_rate": 1.255e-07, "loss": 0.1553, "step": 8250 }, { "epoch": 2.2718823529411765, "grad_norm": 2.783324718475342, "learning_rate": 1.23e-07, "loss": 0.121, "step": 8255 }, { "epoch": 2.272470588235294, "grad_norm": 3.0309135913848877, "learning_rate": 1.205e-07, "loss": 0.2184, "step": 8260 }, { "epoch": 2.2730588235294116, "grad_norm": 4.133063793182373, "learning_rate": 1.1799999999999998e-07, "loss": 0.1842, "step": 8265 }, { "epoch": 2.2736470588235296, "grad_norm": 3.247187614440918, "learning_rate": 1.155e-07, "loss": 0.1479, "step": 8270 }, { "epoch": 2.274235294117647, "grad_norm": 2.9657161235809326, "learning_rate": 1.1299999999999999e-07, "loss": 0.1364, "step": 8275 }, { "epoch": 2.2748235294117647, "grad_norm": 3.9024393558502197, "learning_rate": 1.1049999999999999e-07, "loss": 0.1685, "step": 8280 }, { "epoch": 2.2754117647058822, "grad_norm": 4.209234237670898, "learning_rate": 1.0799999999999999e-07, "loss": 0.2663, "step": 8285 }, { "epoch": 2.276, "grad_norm": 4.061594486236572, "learning_rate": 1.0549999999999999e-07, "loss": 0.2855, "step": 8290 }, { "epoch": 2.276588235294118, "grad_norm": 4.532535076141357, "learning_rate": 1.03e-07, "loss": 0.2181, "step": 8295 }, { "epoch": 2.2771764705882354, "grad_norm": 4.0615997314453125, "learning_rate": 1.005e-07, "loss": 0.2505, "step": 8300 }, { "epoch": 2.277764705882353, "grad_norm": 4.223742961883545, "learning_rate": 9.8e-08, "loss": 0.278, "step": 8305 }, { "epoch": 2.2783529411764705, "grad_norm": 3.851405620574951, "learning_rate": 9.55e-08, "loss": 0.224, "step": 8310 }, { "epoch": 2.278941176470588, "grad_norm": 4.128208637237549, "learning_rate": 9.3e-08, "loss": 0.2451, "step": 8315 }, { "epoch": 2.279529411764706, "grad_norm": 3.7283284664154053, "learning_rate": 9.05e-08, "loss": 0.2471, "step": 8320 }, { "epoch": 2.2801176470588236, "grad_norm": 4.319870948791504, "learning_rate": 8.8e-08, "loss": 0.2664, "step": 8325 }, { "epoch": 2.280705882352941, "grad_norm": 3.5775504112243652, "learning_rate": 8.55e-08, "loss": 0.2436, "step": 8330 }, { "epoch": 2.2812941176470587, "grad_norm": 3.7753541469573975, "learning_rate": 8.3e-08, "loss": 0.2506, "step": 8335 }, { "epoch": 2.2818823529411763, "grad_norm": 3.617588996887207, "learning_rate": 8.05e-08, "loss": 0.2089, "step": 8340 }, { "epoch": 2.2824705882352943, "grad_norm": 3.596907138824463, "learning_rate": 7.8e-08, "loss": 0.196, "step": 8345 }, { "epoch": 2.283058823529412, "grad_norm": 3.489818811416626, "learning_rate": 7.55e-08, "loss": 0.1665, "step": 8350 }, { "epoch": 2.2836470588235294, "grad_norm": 2.8996243476867676, "learning_rate": 7.299999999999999e-08, "loss": 0.1693, "step": 8355 }, { "epoch": 2.284235294117647, "grad_norm": 2.951594829559326, "learning_rate": 7.049999999999999e-08, "loss": 0.128, "step": 8360 }, { "epoch": 2.2848235294117645, "grad_norm": 3.7118210792541504, "learning_rate": 6.8e-08, "loss": 0.1535, "step": 8365 }, { "epoch": 2.2854117647058825, "grad_norm": 3.585965394973755, "learning_rate": 6.55e-08, "loss": 0.161, "step": 8370 }, { "epoch": 2.286, "grad_norm": 3.9355804920196533, "learning_rate": 6.3e-08, "loss": 0.1709, "step": 8375 }, { "epoch": 2.2865882352941176, "grad_norm": 3.9825756549835205, "learning_rate": 6.049999999999999e-08, "loss": 0.1859, "step": 8380 }, { "epoch": 2.287176470588235, "grad_norm": 3.661153554916382, "learning_rate": 5.8e-08, "loss": 0.1728, "step": 8385 }, { "epoch": 2.2877647058823527, "grad_norm": 3.520698070526123, "learning_rate": 5.55e-08, "loss": 0.1867, "step": 8390 }, { "epoch": 2.2883529411764707, "grad_norm": 3.3956141471862793, "learning_rate": 5.3e-08, "loss": 0.1963, "step": 8395 }, { "epoch": 2.2889411764705883, "grad_norm": 3.884486675262451, "learning_rate": 5.05e-08, "loss": 0.2071, "step": 8400 }, { "epoch": 2.289529411764706, "grad_norm": 3.0959761142730713, "learning_rate": 4.8e-08, "loss": 0.1706, "step": 8405 }, { "epoch": 2.2901176470588234, "grad_norm": 3.5418519973754883, "learning_rate": 4.55e-08, "loss": 0.1755, "step": 8410 }, { "epoch": 2.2907058823529414, "grad_norm": 3.603569746017456, "learning_rate": 4.2999999999999995e-08, "loss": 0.1646, "step": 8415 }, { "epoch": 2.291294117647059, "grad_norm": 4.5530805587768555, "learning_rate": 4.05e-08, "loss": 0.1541, "step": 8420 }, { "epoch": 2.2918823529411765, "grad_norm": 4.5351433753967285, "learning_rate": 3.7999999999999996e-08, "loss": 0.1469, "step": 8425 }, { "epoch": 2.292470588235294, "grad_norm": 3.7660880088806152, "learning_rate": 3.5499999999999994e-08, "loss": 0.1378, "step": 8430 }, { "epoch": 2.2930588235294116, "grad_norm": 3.410444736480713, "learning_rate": 3.3e-08, "loss": 0.1244, "step": 8435 }, { "epoch": 2.2936470588235296, "grad_norm": 2.6776397228240967, "learning_rate": 3.0499999999999995e-08, "loss": 0.0996, "step": 8440 }, { "epoch": 2.294235294117647, "grad_norm": 3.839097023010254, "learning_rate": 2.8e-08, "loss": 0.112, "step": 8445 }, { "epoch": 2.2948235294117647, "grad_norm": 4.047727584838867, "learning_rate": 2.5499999999999997e-08, "loss": 0.1046, "step": 8450 }, { "epoch": 2.2954117647058823, "grad_norm": 4.468398094177246, "learning_rate": 2.2999999999999998e-08, "loss": 0.1243, "step": 8455 }, { "epoch": 2.296, "grad_norm": 3.8060684204101562, "learning_rate": 2.05e-08, "loss": 0.1245, "step": 8460 }, { "epoch": 2.296588235294118, "grad_norm": 3.8455605506896973, "learning_rate": 1.8e-08, "loss": 0.1154, "step": 8465 }, { "epoch": 2.2971764705882354, "grad_norm": 3.9797637462615967, "learning_rate": 1.55e-08, "loss": 0.1142, "step": 8470 }, { "epoch": 2.297764705882353, "grad_norm": 3.6920835971832275, "learning_rate": 1.2999999999999999e-08, "loss": 0.1212, "step": 8475 }, { "epoch": 2.2983529411764705, "grad_norm": 3.2424216270446777, "learning_rate": 1.05e-08, "loss": 0.1087, "step": 8480 }, { "epoch": 2.298941176470588, "grad_norm": 3.7677266597747803, "learning_rate": 8e-09, "loss": 0.1249, "step": 8485 }, { "epoch": 2.299529411764706, "grad_norm": 4.102116107940674, "learning_rate": 5.5e-09, "loss": 0.1069, "step": 8490 }, { "epoch": 2.3001176470588236, "grad_norm": 4.839325428009033, "learning_rate": 3e-09, "loss": 0.1128, "step": 8495 }, { "epoch": 2.300705882352941, "grad_norm": 4.124351501464844, "learning_rate": 5e-10, "loss": 0.1186, "step": 8500 }, { "epoch": 2.300705882352941, "eval_loss": 0.13785524666309357, "eval_runtime": 217.0311, "eval_samples_per_second": 8.123, "eval_steps_per_second": 2.032, "eval_wer": 8.73899766481049, "step": 8500 } ], "logging_steps": 5, "max_steps": 8500, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.638149115641856e+20, "train_batch_size": 4, "trial_name": null, "trial_params": null }