|
{ |
|
"best_metric": 0.36100757122039795, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-combined-30hrs-model/checkpoint-2400", |
|
"epoch": 1.2421608196212357, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01034875297526648, |
|
"grad_norm": 139.34642028808594, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 10.2474, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02069750595053296, |
|
"grad_norm": 89.0820541381836, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 8.2652, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03104625892579944, |
|
"grad_norm": 85.51993560791016, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 6.0026, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04139501190106592, |
|
"grad_norm": 91.87001037597656, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 4.5811, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0517437648763324, |
|
"grad_norm": 79.70684814453125, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 3.2507, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06209251785159888, |
|
"grad_norm": 72.15753936767578, |
|
"learning_rate": 2.9e-06, |
|
"loss": 3.2154, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07244127082686536, |
|
"grad_norm": 60.531593322753906, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 2.8239, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08279002380213184, |
|
"grad_norm": 60.771507263183594, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 2.7508, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08279002380213184, |
|
"eval_loss": 0.8133686184883118, |
|
"eval_runtime": 566.5007, |
|
"eval_samples_per_second": 1.919, |
|
"eval_steps_per_second": 0.96, |
|
"eval_wer": 0.4877232142857143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09313877677739832, |
|
"grad_norm": 51.035518646240234, |
|
"learning_rate": 4.4e-06, |
|
"loss": 2.2578, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1034875297526648, |
|
"grad_norm": 69.9592514038086, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 2.4931, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11383628272793128, |
|
"grad_norm": 55.8157844543457, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 2.2654, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12418503570319776, |
|
"grad_norm": 57.47214889526367, |
|
"learning_rate": 5.9e-06, |
|
"loss": 2.1863, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13453378867846424, |
|
"grad_norm": 48.41241455078125, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 2.1504, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1448825416537307, |
|
"grad_norm": 61.10322570800781, |
|
"learning_rate": 6.9e-06, |
|
"loss": 2.1106, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1552312946289972, |
|
"grad_norm": 49.85371017456055, |
|
"learning_rate": 7.4e-06, |
|
"loss": 1.9696, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16558004760426367, |
|
"grad_norm": 78.61709594726562, |
|
"learning_rate": 7.9e-06, |
|
"loss": 1.8748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16558004760426367, |
|
"eval_loss": 0.6291218996047974, |
|
"eval_runtime": 561.4046, |
|
"eval_samples_per_second": 1.936, |
|
"eval_steps_per_second": 0.969, |
|
"eval_wer": 0.3898133116883117, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17592880057953017, |
|
"grad_norm": 58.551658630371094, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.848, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18627755355479664, |
|
"grad_norm": 37.77573013305664, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 1.9659, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19662630653006313, |
|
"grad_norm": 67.77395629882812, |
|
"learning_rate": 9.4e-06, |
|
"loss": 2.0312, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2069750595053296, |
|
"grad_norm": 48.11214828491211, |
|
"learning_rate": 9.9e-06, |
|
"loss": 1.795, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2173238124805961, |
|
"grad_norm": 54.6898307800293, |
|
"learning_rate": 9.997220291869355e-06, |
|
"loss": 1.8759, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.22767256545586256, |
|
"grad_norm": 56.6167106628418, |
|
"learning_rate": 9.993745656706047e-06, |
|
"loss": 1.9263, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23802131843112906, |
|
"grad_norm": 43.32560729980469, |
|
"learning_rate": 9.990271021542739e-06, |
|
"loss": 1.889, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.24837007140639553, |
|
"grad_norm": 49.59733581542969, |
|
"learning_rate": 9.986796386379432e-06, |
|
"loss": 1.6214, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24837007140639553, |
|
"eval_loss": 0.5559969544410706, |
|
"eval_runtime": 561.0579, |
|
"eval_samples_per_second": 1.937, |
|
"eval_steps_per_second": 0.97, |
|
"eval_wer": 0.3431412337662338, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.258718824381662, |
|
"grad_norm": 40.30105972290039, |
|
"learning_rate": 9.983321751216123e-06, |
|
"loss": 1.6189, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2690675773569285, |
|
"grad_norm": 42.67938995361328, |
|
"learning_rate": 9.979847116052815e-06, |
|
"loss": 1.3983, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.279416330332195, |
|
"grad_norm": 36.10395431518555, |
|
"learning_rate": 9.976372480889508e-06, |
|
"loss": 1.6815, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2897650833074614, |
|
"grad_norm": 42.29354476928711, |
|
"learning_rate": 9.9728978457262e-06, |
|
"loss": 1.5022, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3001138362827279, |
|
"grad_norm": 50.72279357910156, |
|
"learning_rate": 9.969423210562891e-06, |
|
"loss": 1.5644, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3104625892579944, |
|
"grad_norm": 49.18547821044922, |
|
"learning_rate": 9.965948575399585e-06, |
|
"loss": 1.6269, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3208113422332609, |
|
"grad_norm": 42.0450325012207, |
|
"learning_rate": 9.962473940236276e-06, |
|
"loss": 1.4277, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.33116009520852735, |
|
"grad_norm": 58.25082015991211, |
|
"learning_rate": 9.958999305072968e-06, |
|
"loss": 1.559, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33116009520852735, |
|
"eval_loss": 0.49684250354766846, |
|
"eval_runtime": 559.6818, |
|
"eval_samples_per_second": 1.942, |
|
"eval_steps_per_second": 0.972, |
|
"eval_wer": 0.29525162337662336, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34150884818379384, |
|
"grad_norm": 37.699710845947266, |
|
"learning_rate": 9.955524669909661e-06, |
|
"loss": 1.2958, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.35185760115906034, |
|
"grad_norm": 30.309236526489258, |
|
"learning_rate": 9.952050034746353e-06, |
|
"loss": 1.4174, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.36220635413432684, |
|
"grad_norm": 43.847652435302734, |
|
"learning_rate": 9.948575399583044e-06, |
|
"loss": 1.5185, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3725551071095933, |
|
"grad_norm": 57.50359344482422, |
|
"learning_rate": 9.945100764419738e-06, |
|
"loss": 1.5072, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38290386008485977, |
|
"grad_norm": 43.106422424316406, |
|
"learning_rate": 9.941626129256429e-06, |
|
"loss": 1.4437, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.39325261306012627, |
|
"grad_norm": 38.97978591918945, |
|
"learning_rate": 9.93815149409312e-06, |
|
"loss": 1.4267, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.40360136603539276, |
|
"grad_norm": 39.729026794433594, |
|
"learning_rate": 9.934676858929814e-06, |
|
"loss": 1.4166, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4139501190106592, |
|
"grad_norm": 45.119964599609375, |
|
"learning_rate": 9.931202223766506e-06, |
|
"loss": 1.3616, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4139501190106592, |
|
"eval_loss": 0.4720439910888672, |
|
"eval_runtime": 559.355, |
|
"eval_samples_per_second": 1.943, |
|
"eval_steps_per_second": 0.973, |
|
"eval_wer": 0.2872362012987013, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4242988719859257, |
|
"grad_norm": 39.95614242553711, |
|
"learning_rate": 9.927727588603197e-06, |
|
"loss": 1.4632, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.4346476249611922, |
|
"grad_norm": 26.40887451171875, |
|
"learning_rate": 9.92425295343989e-06, |
|
"loss": 1.2984, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.44499637793645863, |
|
"grad_norm": 42.91450500488281, |
|
"learning_rate": 9.920778318276582e-06, |
|
"loss": 1.5535, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4553451309117251, |
|
"grad_norm": 61.96196746826172, |
|
"learning_rate": 9.917303683113274e-06, |
|
"loss": 1.2644, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4656938838869916, |
|
"grad_norm": 54.67252731323242, |
|
"learning_rate": 9.913829047949967e-06, |
|
"loss": 1.1995, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.4760426368622581, |
|
"grad_norm": 46.1182975769043, |
|
"learning_rate": 9.910354412786658e-06, |
|
"loss": 1.5119, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.48639138983752456, |
|
"grad_norm": 43.22040557861328, |
|
"learning_rate": 9.90687977762335e-06, |
|
"loss": 1.244, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.49674014281279105, |
|
"grad_norm": 45.609161376953125, |
|
"learning_rate": 9.903405142460043e-06, |
|
"loss": 1.3078, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.49674014281279105, |
|
"eval_loss": 0.4577382802963257, |
|
"eval_runtime": 550.5837, |
|
"eval_samples_per_second": 1.974, |
|
"eval_steps_per_second": 0.988, |
|
"eval_wer": 0.29778814935064934, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5070888957880575, |
|
"grad_norm": 57.24745178222656, |
|
"learning_rate": 9.899930507296735e-06, |
|
"loss": 1.2625, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.517437648763324, |
|
"grad_norm": 29.896320343017578, |
|
"learning_rate": 9.896455872133426e-06, |
|
"loss": 1.2438, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5277864017385905, |
|
"grad_norm": 56.462646484375, |
|
"learning_rate": 9.89298123697012e-06, |
|
"loss": 1.3759, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.538135154713857, |
|
"grad_norm": 39.78219985961914, |
|
"learning_rate": 9.889506601806811e-06, |
|
"loss": 1.3728, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5484839076891235, |
|
"grad_norm": 45.62682342529297, |
|
"learning_rate": 9.886031966643503e-06, |
|
"loss": 1.3165, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.55883266066439, |
|
"grad_norm": 41.924949645996094, |
|
"learning_rate": 9.882557331480196e-06, |
|
"loss": 1.3968, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5691814136396565, |
|
"grad_norm": 39.86012649536133, |
|
"learning_rate": 9.879082696316888e-06, |
|
"loss": 1.1669, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5795301666149228, |
|
"grad_norm": 30.276840209960938, |
|
"learning_rate": 9.87560806115358e-06, |
|
"loss": 1.2579, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5795301666149228, |
|
"eval_loss": 0.42178860306739807, |
|
"eval_runtime": 565.0754, |
|
"eval_samples_per_second": 1.924, |
|
"eval_steps_per_second": 0.963, |
|
"eval_wer": 0.2757711038961039, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5898789195901893, |
|
"grad_norm": 27.80983543395996, |
|
"learning_rate": 9.872133425990272e-06, |
|
"loss": 1.1973, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6002276725654558, |
|
"grad_norm": 54.13436508178711, |
|
"learning_rate": 9.868658790826964e-06, |
|
"loss": 1.3214, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6105764255407223, |
|
"grad_norm": 31.381301879882812, |
|
"learning_rate": 9.865184155663656e-06, |
|
"loss": 1.3413, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6209251785159888, |
|
"grad_norm": 35.9166259765625, |
|
"learning_rate": 9.861709520500349e-06, |
|
"loss": 1.2117, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6312739314912553, |
|
"grad_norm": 37.55325698852539, |
|
"learning_rate": 9.85823488533704e-06, |
|
"loss": 1.2422, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.6416226844665218, |
|
"grad_norm": 32.475807189941406, |
|
"learning_rate": 9.854760250173732e-06, |
|
"loss": 1.2666, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6519714374417883, |
|
"grad_norm": 35.703243255615234, |
|
"learning_rate": 9.851285615010425e-06, |
|
"loss": 1.1171, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.6623201904170547, |
|
"grad_norm": 41.8420524597168, |
|
"learning_rate": 9.847810979847117e-06, |
|
"loss": 1.214, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6623201904170547, |
|
"eval_loss": 0.41559898853302, |
|
"eval_runtime": 555.4115, |
|
"eval_samples_per_second": 1.957, |
|
"eval_steps_per_second": 0.979, |
|
"eval_wer": 0.26542207792207795, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6726689433923212, |
|
"grad_norm": 38.391361236572266, |
|
"learning_rate": 9.844336344683808e-06, |
|
"loss": 1.2246, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6830176963675877, |
|
"grad_norm": 57.34391784667969, |
|
"learning_rate": 9.840861709520502e-06, |
|
"loss": 1.0536, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6933664493428542, |
|
"grad_norm": 19.72743034362793, |
|
"learning_rate": 9.837387074357193e-06, |
|
"loss": 1.2277, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7037152023181207, |
|
"grad_norm": 38.91509246826172, |
|
"learning_rate": 9.833912439193885e-06, |
|
"loss": 1.2657, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7140639552933872, |
|
"grad_norm": 51.42982864379883, |
|
"learning_rate": 9.830437804030578e-06, |
|
"loss": 1.2865, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7244127082686537, |
|
"grad_norm": 37.256927490234375, |
|
"learning_rate": 9.82696316886727e-06, |
|
"loss": 1.1125, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.73476146124392, |
|
"grad_norm": 37.06630325317383, |
|
"learning_rate": 9.823488533703961e-06, |
|
"loss": 1.2179, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.7451102142191866, |
|
"grad_norm": 28.06951332092285, |
|
"learning_rate": 9.820013898540655e-06, |
|
"loss": 1.0719, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7451102142191866, |
|
"eval_loss": 0.40048521757125854, |
|
"eval_runtime": 544.8903, |
|
"eval_samples_per_second": 1.995, |
|
"eval_steps_per_second": 0.998, |
|
"eval_wer": 0.2315340909090909, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.755458967194453, |
|
"grad_norm": 43.74811553955078, |
|
"learning_rate": 9.816539263377346e-06, |
|
"loss": 1.1466, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.7658077201697195, |
|
"grad_norm": 67.86227416992188, |
|
"learning_rate": 9.813064628214038e-06, |
|
"loss": 1.2406, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.776156473144986, |
|
"grad_norm": 21.888675689697266, |
|
"learning_rate": 9.809589993050731e-06, |
|
"loss": 1.0934, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.7865052261202525, |
|
"grad_norm": 34.76344680786133, |
|
"learning_rate": 9.806115357887423e-06, |
|
"loss": 1.3778, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.796853979095519, |
|
"grad_norm": 37.02476501464844, |
|
"learning_rate": 9.802640722724114e-06, |
|
"loss": 1.1266, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8072027320707855, |
|
"grad_norm": 25.008710861206055, |
|
"learning_rate": 9.799166087560807e-06, |
|
"loss": 0.9663, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8175514850460519, |
|
"grad_norm": 43.19198989868164, |
|
"learning_rate": 9.795691452397499e-06, |
|
"loss": 1.2192, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8279002380213184, |
|
"grad_norm": 42.85536193847656, |
|
"learning_rate": 9.79221681723419e-06, |
|
"loss": 1.0432, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8279002380213184, |
|
"eval_loss": 0.3864189684391022, |
|
"eval_runtime": 555.4403, |
|
"eval_samples_per_second": 1.957, |
|
"eval_steps_per_second": 0.979, |
|
"eval_wer": 0.24330357142857142, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8382489909965849, |
|
"grad_norm": 42.24909591674805, |
|
"learning_rate": 9.788742182070884e-06, |
|
"loss": 1.2698, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.8485977439718514, |
|
"grad_norm": 30.07208824157715, |
|
"learning_rate": 9.785267546907575e-06, |
|
"loss": 1.0744, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8589464969471179, |
|
"grad_norm": 40.065677642822266, |
|
"learning_rate": 9.781792911744267e-06, |
|
"loss": 1.0424, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.8692952499223844, |
|
"grad_norm": 33.75371170043945, |
|
"learning_rate": 9.77831827658096e-06, |
|
"loss": 1.1718, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8796440028976509, |
|
"grad_norm": 29.951263427734375, |
|
"learning_rate": 9.774843641417652e-06, |
|
"loss": 1.0589, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.8899927558729173, |
|
"grad_norm": 48.64168930053711, |
|
"learning_rate": 9.771369006254343e-06, |
|
"loss": 1.253, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9003415088481838, |
|
"grad_norm": 50.58803939819336, |
|
"learning_rate": 9.767894371091037e-06, |
|
"loss": 1.1886, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.9106902618234503, |
|
"grad_norm": 40.60319900512695, |
|
"learning_rate": 9.764419735927728e-06, |
|
"loss": 0.9825, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9106902618234503, |
|
"eval_loss": 0.3742503523826599, |
|
"eval_runtime": 564.0293, |
|
"eval_samples_per_second": 1.927, |
|
"eval_steps_per_second": 0.964, |
|
"eval_wer": 0.22067775974025974, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9210390147987167, |
|
"grad_norm": 42.79588317871094, |
|
"learning_rate": 9.760945100764422e-06, |
|
"loss": 1.0184, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.9313877677739832, |
|
"grad_norm": 59.941490173339844, |
|
"learning_rate": 9.757470465601113e-06, |
|
"loss": 1.2345, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9417365207492497, |
|
"grad_norm": 45.79833984375, |
|
"learning_rate": 9.753995830437805e-06, |
|
"loss": 1.0212, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.9520852737245162, |
|
"grad_norm": 41.137176513671875, |
|
"learning_rate": 9.750521195274498e-06, |
|
"loss": 1.2013, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9624340266997827, |
|
"grad_norm": 36.94132614135742, |
|
"learning_rate": 9.74704656011119e-06, |
|
"loss": 1.1972, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.9727827796750491, |
|
"grad_norm": 27.98811912536621, |
|
"learning_rate": 9.743571924947881e-06, |
|
"loss": 1.088, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.9831315326503156, |
|
"grad_norm": 20.012388229370117, |
|
"learning_rate": 9.740097289784574e-06, |
|
"loss": 0.9881, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.9934802856255821, |
|
"grad_norm": 40.9871940612793, |
|
"learning_rate": 9.736622654621266e-06, |
|
"loss": 1.0952, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.9934802856255821, |
|
"eval_loss": 0.36100757122039795, |
|
"eval_runtime": 567.8727, |
|
"eval_samples_per_second": 1.914, |
|
"eval_steps_per_second": 0.958, |
|
"eval_wer": 0.2234172077922078, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0041395011901066, |
|
"grad_norm": 36.53849411010742, |
|
"learning_rate": 9.733148019457958e-06, |
|
"loss": 0.8935, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.014488254165373, |
|
"grad_norm": 34.85436248779297, |
|
"learning_rate": 9.72967338429465e-06, |
|
"loss": 0.6152, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0248370071406396, |
|
"grad_norm": 22.534700393676758, |
|
"learning_rate": 9.726198749131342e-06, |
|
"loss": 0.5735, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.035185760115906, |
|
"grad_norm": 23.778539657592773, |
|
"learning_rate": 9.722724113968034e-06, |
|
"loss": 0.5496, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0455345130911726, |
|
"grad_norm": 36.943050384521484, |
|
"learning_rate": 9.719249478804727e-06, |
|
"loss": 0.6067, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.055883266066439, |
|
"grad_norm": 21.048763275146484, |
|
"learning_rate": 9.715774843641419e-06, |
|
"loss": 0.5577, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.0662320190417054, |
|
"grad_norm": 31.97542953491211, |
|
"learning_rate": 9.71230020847811e-06, |
|
"loss": 0.5266, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.076580772016972, |
|
"grad_norm": 36.05984115600586, |
|
"learning_rate": 9.708825573314804e-06, |
|
"loss": 0.6001, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.076580772016972, |
|
"eval_loss": 0.388786643743515, |
|
"eval_runtime": 572.4362, |
|
"eval_samples_per_second": 1.899, |
|
"eval_steps_per_second": 0.95, |
|
"eval_wer": 0.24228896103896103, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.0869295249922384, |
|
"grad_norm": 35.275272369384766, |
|
"learning_rate": 9.705350938151495e-06, |
|
"loss": 0.6265, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.097278277967505, |
|
"grad_norm": 23.014320373535156, |
|
"learning_rate": 9.701876302988187e-06, |
|
"loss": 0.5507, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1076270309427714, |
|
"grad_norm": 18.49266242980957, |
|
"learning_rate": 9.69840166782488e-06, |
|
"loss": 0.6563, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.117975783918038, |
|
"grad_norm": 22.754758834838867, |
|
"learning_rate": 9.694927032661572e-06, |
|
"loss": 0.633, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1283245368933044, |
|
"grad_norm": 24.042457580566406, |
|
"learning_rate": 9.691452397498263e-06, |
|
"loss": 0.5714, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.1386732898685707, |
|
"grad_norm": 31.35757827758789, |
|
"learning_rate": 9.687977762334956e-06, |
|
"loss": 0.6269, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.1490220428438374, |
|
"grad_norm": 15.74731731414795, |
|
"learning_rate": 9.684503127171648e-06, |
|
"loss": 0.633, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.1593707958191037, |
|
"grad_norm": 25.903976440429688, |
|
"learning_rate": 9.68102849200834e-06, |
|
"loss": 0.5491, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1593707958191037, |
|
"eval_loss": 0.37300390005111694, |
|
"eval_runtime": 573.7758, |
|
"eval_samples_per_second": 1.894, |
|
"eval_steps_per_second": 0.948, |
|
"eval_wer": 0.22646103896103897, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.1697195487943703, |
|
"grad_norm": 36.39768981933594, |
|
"learning_rate": 9.677553856845033e-06, |
|
"loss": 0.5838, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.1800683017696367, |
|
"grad_norm": 36.48637390136719, |
|
"learning_rate": 9.674079221681724e-06, |
|
"loss": 0.5071, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.1904170547449033, |
|
"grad_norm": 43.22209167480469, |
|
"learning_rate": 9.670604586518416e-06, |
|
"loss": 0.5702, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.2007658077201697, |
|
"grad_norm": 26.855955123901367, |
|
"learning_rate": 9.66712995135511e-06, |
|
"loss": 0.6014, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.211114560695436, |
|
"grad_norm": 28.16461181640625, |
|
"learning_rate": 9.663655316191801e-06, |
|
"loss": 0.6213, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.2214633136707027, |
|
"grad_norm": 19.875337600708008, |
|
"learning_rate": 9.660180681028492e-06, |
|
"loss": 0.5896, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.231812066645969, |
|
"grad_norm": 24.210161209106445, |
|
"learning_rate": 9.656706045865186e-06, |
|
"loss": 0.6079, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.2421608196212357, |
|
"grad_norm": 21.59490394592285, |
|
"learning_rate": 9.653231410701877e-06, |
|
"loss": 0.6732, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2421608196212357, |
|
"eval_loss": 0.3701952397823334, |
|
"eval_runtime": 580.4786, |
|
"eval_samples_per_second": 1.873, |
|
"eval_steps_per_second": 0.937, |
|
"eval_wer": 0.2200689935064935, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2421608196212357, |
|
"step": 3000, |
|
"total_flos": 2.449860020011008e+19, |
|
"train_loss": 1.494960307121277, |
|
"train_runtime": 14532.737, |
|
"train_samples_per_second": 39.895, |
|
"train_steps_per_second": 4.985 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 72450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.449860020011008e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|