|
{ |
|
"best_metric": 0.038467586040496826, |
|
"best_model_checkpoint": "./test_microsoft_dit/checkpoint-7924", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 9905, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005047955577990914, |
|
"grad_norm": 0.8398004174232483, |
|
"learning_rate": 2.9969712266532054e-05, |
|
"loss": 0.3087, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010095911155981827, |
|
"grad_norm": 1.147126317024231, |
|
"learning_rate": 2.993942453306411e-05, |
|
"loss": 0.202, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01514386673397274, |
|
"grad_norm": 1.1376692056655884, |
|
"learning_rate": 2.9909136799596164e-05, |
|
"loss": 0.1375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020191822311963654, |
|
"grad_norm": 3.0222654342651367, |
|
"learning_rate": 2.987884906612822e-05, |
|
"loss": 0.1254, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02523977788995457, |
|
"grad_norm": 1.3963178396224976, |
|
"learning_rate": 2.9848561332660275e-05, |
|
"loss": 0.1105, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03028773346794548, |
|
"grad_norm": 0.741131067276001, |
|
"learning_rate": 2.9818273599192328e-05, |
|
"loss": 0.1022, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0353356890459364, |
|
"grad_norm": 1.0705397129058838, |
|
"learning_rate": 2.978798586572438e-05, |
|
"loss": 0.1027, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04038364462392731, |
|
"grad_norm": 1.127729892730713, |
|
"learning_rate": 2.9757698132256435e-05, |
|
"loss": 0.0979, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04543160020191822, |
|
"grad_norm": 0.888960063457489, |
|
"learning_rate": 2.9727410398788492e-05, |
|
"loss": 0.1024, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05047955577990914, |
|
"grad_norm": 0.9185839295387268, |
|
"learning_rate": 2.9697122665320545e-05, |
|
"loss": 0.1142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05552751135790005, |
|
"grad_norm": 0.737047016620636, |
|
"learning_rate": 2.96668349318526e-05, |
|
"loss": 0.0956, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06057546693589096, |
|
"grad_norm": 0.7749747037887573, |
|
"learning_rate": 2.9636547198384656e-05, |
|
"loss": 0.0978, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06562342251388188, |
|
"grad_norm": 1.079695224761963, |
|
"learning_rate": 2.960625946491671e-05, |
|
"loss": 0.092, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0706713780918728, |
|
"grad_norm": 0.8315634727478027, |
|
"learning_rate": 2.9575971731448766e-05, |
|
"loss": 0.0975, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0757193336698637, |
|
"grad_norm": 0.7270865440368652, |
|
"learning_rate": 2.954568399798082e-05, |
|
"loss": 0.098, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08076728924785462, |
|
"grad_norm": 0.5786823630332947, |
|
"learning_rate": 2.9515396264512873e-05, |
|
"loss": 0.0846, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08581524482584553, |
|
"grad_norm": 0.7117003798484802, |
|
"learning_rate": 2.948510853104493e-05, |
|
"loss": 0.0905, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09086320040383644, |
|
"grad_norm": 0.6765159368515015, |
|
"learning_rate": 2.9454820797576983e-05, |
|
"loss": 0.0764, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09591115598182735, |
|
"grad_norm": 1.1397738456726074, |
|
"learning_rate": 2.9424533064109037e-05, |
|
"loss": 0.0882, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10095911155981828, |
|
"grad_norm": 0.6545870900154114, |
|
"learning_rate": 2.939424533064109e-05, |
|
"loss": 0.0991, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10600706713780919, |
|
"grad_norm": 0.8882391452789307, |
|
"learning_rate": 2.9363957597173144e-05, |
|
"loss": 0.0902, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1110550227158001, |
|
"grad_norm": 0.5973140001296997, |
|
"learning_rate": 2.93336698637052e-05, |
|
"loss": 0.0968, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11610297829379101, |
|
"grad_norm": 1.3215384483337402, |
|
"learning_rate": 2.9303382130237254e-05, |
|
"loss": 0.0901, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12115093387178193, |
|
"grad_norm": 0.6139042973518372, |
|
"learning_rate": 2.9273094396769307e-05, |
|
"loss": 0.0739, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12619888944977284, |
|
"grad_norm": 0.9095037579536438, |
|
"learning_rate": 2.9242806663301364e-05, |
|
"loss": 0.0907, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13124684502776376, |
|
"grad_norm": 1.0266954898834229, |
|
"learning_rate": 2.9212518929833418e-05, |
|
"loss": 0.0726, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13629480060575466, |
|
"grad_norm": 0.734716534614563, |
|
"learning_rate": 2.9182231196365474e-05, |
|
"loss": 0.0891, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.1413427561837456, |
|
"grad_norm": 0.7633081674575806, |
|
"learning_rate": 2.9151943462897528e-05, |
|
"loss": 0.0747, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1463907117617365, |
|
"grad_norm": 0.8185615539550781, |
|
"learning_rate": 2.912165572942958e-05, |
|
"loss": 0.0815, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1514386673397274, |
|
"grad_norm": 1.2503191232681274, |
|
"learning_rate": 2.9091367995961638e-05, |
|
"loss": 0.0844, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15648662291771834, |
|
"grad_norm": 0.52531898021698, |
|
"learning_rate": 2.906108026249369e-05, |
|
"loss": 0.0863, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16153457849570924, |
|
"grad_norm": 0.8883135914802551, |
|
"learning_rate": 2.9030792529025745e-05, |
|
"loss": 0.0833, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16658253407370016, |
|
"grad_norm": 0.5173369646072388, |
|
"learning_rate": 2.90005047955578e-05, |
|
"loss": 0.0882, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17163048965169106, |
|
"grad_norm": 0.5770648717880249, |
|
"learning_rate": 2.8970217062089852e-05, |
|
"loss": 0.0814, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.17667844522968199, |
|
"grad_norm": 0.8828192949295044, |
|
"learning_rate": 2.893992932862191e-05, |
|
"loss": 0.0776, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18172640080767288, |
|
"grad_norm": 0.756236732006073, |
|
"learning_rate": 2.8909641595153962e-05, |
|
"loss": 0.0736, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1867743563856638, |
|
"grad_norm": 0.47730007767677307, |
|
"learning_rate": 2.887935386168602e-05, |
|
"loss": 0.0856, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1918223119636547, |
|
"grad_norm": 2.5338025093078613, |
|
"learning_rate": 2.8849066128218072e-05, |
|
"loss": 0.0879, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.19687026754164563, |
|
"grad_norm": 0.6218165159225464, |
|
"learning_rate": 2.8818778394750126e-05, |
|
"loss": 0.0724, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20191822311963656, |
|
"grad_norm": 1.1621041297912598, |
|
"learning_rate": 2.8788490661282183e-05, |
|
"loss": 0.0742, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20696617869762746, |
|
"grad_norm": 0.8511998653411865, |
|
"learning_rate": 2.8758202927814236e-05, |
|
"loss": 0.0798, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21201413427561838, |
|
"grad_norm": 0.5848472118377686, |
|
"learning_rate": 2.8727915194346293e-05, |
|
"loss": 0.0834, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.21706208985360928, |
|
"grad_norm": 0.5747645497322083, |
|
"learning_rate": 2.8697627460878346e-05, |
|
"loss": 0.0745, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2221100454316002, |
|
"grad_norm": 1.058206558227539, |
|
"learning_rate": 2.86673397274104e-05, |
|
"loss": 0.0767, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2271580010095911, |
|
"grad_norm": 0.8267918825149536, |
|
"learning_rate": 2.8637051993942453e-05, |
|
"loss": 0.0893, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23220595658758203, |
|
"grad_norm": 1.1392240524291992, |
|
"learning_rate": 2.8606764260474507e-05, |
|
"loss": 0.0833, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.23725391216557296, |
|
"grad_norm": 0.9474436044692993, |
|
"learning_rate": 2.8576476527006564e-05, |
|
"loss": 0.0896, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.24230186774356385, |
|
"grad_norm": 1.2880048751831055, |
|
"learning_rate": 2.8546188793538617e-05, |
|
"loss": 0.0924, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.24734982332155478, |
|
"grad_norm": 0.6342403888702393, |
|
"learning_rate": 2.851590106007067e-05, |
|
"loss": 0.0799, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2523977788995457, |
|
"grad_norm": 0.5780256986618042, |
|
"learning_rate": 2.8485613326602727e-05, |
|
"loss": 0.0798, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2574457344775366, |
|
"grad_norm": 0.7743504643440247, |
|
"learning_rate": 2.845532559313478e-05, |
|
"loss": 0.0681, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26249369005552753, |
|
"grad_norm": 0.5771861672401428, |
|
"learning_rate": 2.8425037859666834e-05, |
|
"loss": 0.0753, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2675416456335184, |
|
"grad_norm": 0.6735575199127197, |
|
"learning_rate": 2.839475012619889e-05, |
|
"loss": 0.0773, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2725896012115093, |
|
"grad_norm": 0.7692667841911316, |
|
"learning_rate": 2.8364462392730945e-05, |
|
"loss": 0.0732, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.27763755678950025, |
|
"grad_norm": 0.5109196901321411, |
|
"learning_rate": 2.8334174659263e-05, |
|
"loss": 0.0859, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2826855123674912, |
|
"grad_norm": 0.726249098777771, |
|
"learning_rate": 2.8303886925795055e-05, |
|
"loss": 0.0801, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2877334679454821, |
|
"grad_norm": 0.8817322254180908, |
|
"learning_rate": 2.8273599192327108e-05, |
|
"loss": 0.0739, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.292781423523473, |
|
"grad_norm": 0.5081413984298706, |
|
"learning_rate": 2.8243311458859162e-05, |
|
"loss": 0.0727, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2978293791014639, |
|
"grad_norm": 0.9367203712463379, |
|
"learning_rate": 2.8213023725391215e-05, |
|
"loss": 0.0751, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3028773346794548, |
|
"grad_norm": 0.5382592678070068, |
|
"learning_rate": 2.8182735991923272e-05, |
|
"loss": 0.0756, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.30792529025744575, |
|
"grad_norm": 0.40977007150650024, |
|
"learning_rate": 2.8152448258455325e-05, |
|
"loss": 0.0714, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3129732458354367, |
|
"grad_norm": 0.6829769015312195, |
|
"learning_rate": 2.812216052498738e-05, |
|
"loss": 0.0809, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.31802120141342755, |
|
"grad_norm": 0.4805002212524414, |
|
"learning_rate": 2.8091872791519436e-05, |
|
"loss": 0.0789, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.32306915699141847, |
|
"grad_norm": 0.6755364537239075, |
|
"learning_rate": 2.806158505805149e-05, |
|
"loss": 0.0819, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3281171125694094, |
|
"grad_norm": 1.3035857677459717, |
|
"learning_rate": 2.8031297324583546e-05, |
|
"loss": 0.0861, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3331650681474003, |
|
"grad_norm": 0.7905831933021545, |
|
"learning_rate": 2.80010095911156e-05, |
|
"loss": 0.0739, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3382130237253912, |
|
"grad_norm": 0.8810652494430542, |
|
"learning_rate": 2.7970721857647653e-05, |
|
"loss": 0.0678, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3432609793033821, |
|
"grad_norm": 1.1220252513885498, |
|
"learning_rate": 2.794043412417971e-05, |
|
"loss": 0.07, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.34830893488137304, |
|
"grad_norm": 0.8519473075866699, |
|
"learning_rate": 2.7910146390711763e-05, |
|
"loss": 0.076, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.35335689045936397, |
|
"grad_norm": 0.49878937005996704, |
|
"learning_rate": 2.787985865724382e-05, |
|
"loss": 0.0787, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3584048460373549, |
|
"grad_norm": 1.4854084253311157, |
|
"learning_rate": 2.784957092377587e-05, |
|
"loss": 0.0872, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.36345280161534577, |
|
"grad_norm": 0.787535548210144, |
|
"learning_rate": 2.7819283190307924e-05, |
|
"loss": 0.0805, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3685007571933367, |
|
"grad_norm": 0.8322392106056213, |
|
"learning_rate": 2.778899545683998e-05, |
|
"loss": 0.0726, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3735487127713276, |
|
"grad_norm": 0.48470157384872437, |
|
"learning_rate": 2.7758707723372034e-05, |
|
"loss": 0.0673, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.37859666834931854, |
|
"grad_norm": 0.8375622034072876, |
|
"learning_rate": 2.772841998990409e-05, |
|
"loss": 0.0767, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3836446239273094, |
|
"grad_norm": 0.5212222337722778, |
|
"learning_rate": 2.7698132256436144e-05, |
|
"loss": 0.0737, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.38869257950530034, |
|
"grad_norm": 0.503209114074707, |
|
"learning_rate": 2.7667844522968198e-05, |
|
"loss": 0.0657, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.39374053508329127, |
|
"grad_norm": 0.4290629029273987, |
|
"learning_rate": 2.7637556789500254e-05, |
|
"loss": 0.0745, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3987884906612822, |
|
"grad_norm": 0.7535534501075745, |
|
"learning_rate": 2.7607269056032308e-05, |
|
"loss": 0.0702, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4038364462392731, |
|
"grad_norm": 0.67135089635849, |
|
"learning_rate": 2.757698132256436e-05, |
|
"loss": 0.0754, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.408884401817264, |
|
"grad_norm": 0.5307912230491638, |
|
"learning_rate": 2.7546693589096418e-05, |
|
"loss": 0.0717, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4139323573952549, |
|
"grad_norm": 0.46130767464637756, |
|
"learning_rate": 2.751640585562847e-05, |
|
"loss": 0.065, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.41898031297324584, |
|
"grad_norm": 1.2904905080795288, |
|
"learning_rate": 2.748611812216053e-05, |
|
"loss": 0.0818, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.42402826855123676, |
|
"grad_norm": 2.0480494499206543, |
|
"learning_rate": 2.745583038869258e-05, |
|
"loss": 0.085, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4290762241292277, |
|
"grad_norm": 0.5108308792114258, |
|
"learning_rate": 2.7425542655224632e-05, |
|
"loss": 0.0729, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.43412417970721856, |
|
"grad_norm": 0.6915296912193298, |
|
"learning_rate": 2.739525492175669e-05, |
|
"loss": 0.071, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4391721352852095, |
|
"grad_norm": 0.8100910782814026, |
|
"learning_rate": 2.7364967188288742e-05, |
|
"loss": 0.0667, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4442200908632004, |
|
"grad_norm": 0.626818835735321, |
|
"learning_rate": 2.73346794548208e-05, |
|
"loss": 0.0695, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.44926804644119134, |
|
"grad_norm": 0.673156201839447, |
|
"learning_rate": 2.7304391721352853e-05, |
|
"loss": 0.0793, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4543160020191822, |
|
"grad_norm": 0.5740798711776733, |
|
"learning_rate": 2.7274103987884906e-05, |
|
"loss": 0.0731, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45936395759717313, |
|
"grad_norm": 0.744429349899292, |
|
"learning_rate": 2.7243816254416963e-05, |
|
"loss": 0.0743, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.46441191317516406, |
|
"grad_norm": 0.5837222933769226, |
|
"learning_rate": 2.7213528520949016e-05, |
|
"loss": 0.0747, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.469459868753155, |
|
"grad_norm": 0.500978410243988, |
|
"learning_rate": 2.7183240787481073e-05, |
|
"loss": 0.0753, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4745078243311459, |
|
"grad_norm": 1.0817604064941406, |
|
"learning_rate": 2.7152953054013127e-05, |
|
"loss": 0.0748, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4795557799091368, |
|
"grad_norm": 0.5821205377578735, |
|
"learning_rate": 2.712266532054518e-05, |
|
"loss": 0.0766, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4846037354871277, |
|
"grad_norm": 0.6120801568031311, |
|
"learning_rate": 2.7092377587077233e-05, |
|
"loss": 0.0827, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.48965169106511863, |
|
"grad_norm": 0.4379239082336426, |
|
"learning_rate": 2.7062089853609287e-05, |
|
"loss": 0.0664, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.49469964664310956, |
|
"grad_norm": 0.5472243428230286, |
|
"learning_rate": 2.7031802120141344e-05, |
|
"loss": 0.0767, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.49974760222110043, |
|
"grad_norm": 1.0190905332565308, |
|
"learning_rate": 2.7001514386673397e-05, |
|
"loss": 0.0739, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5047955577990914, |
|
"grad_norm": 0.7046610713005066, |
|
"learning_rate": 2.697122665320545e-05, |
|
"loss": 0.0685, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5098435133770823, |
|
"grad_norm": 0.5559498071670532, |
|
"learning_rate": 2.6940938919737507e-05, |
|
"loss": 0.0715, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5148914689550732, |
|
"grad_norm": 0.6298381686210632, |
|
"learning_rate": 2.691065118626956e-05, |
|
"loss": 0.0828, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5199394245330641, |
|
"grad_norm": 0.7023555636405945, |
|
"learning_rate": 2.6880363452801618e-05, |
|
"loss": 0.0809, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5249873801110551, |
|
"grad_norm": 0.6804683804512024, |
|
"learning_rate": 2.685007571933367e-05, |
|
"loss": 0.0739, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5300353356890459, |
|
"grad_norm": 0.7743015885353088, |
|
"learning_rate": 2.6819787985865725e-05, |
|
"loss": 0.0658, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5350832912670368, |
|
"grad_norm": 1.36810302734375, |
|
"learning_rate": 2.678950025239778e-05, |
|
"loss": 0.0747, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5401312468450278, |
|
"grad_norm": 0.47373896837234497, |
|
"learning_rate": 2.6759212518929835e-05, |
|
"loss": 0.0751, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5451792024230186, |
|
"grad_norm": 0.6654021143913269, |
|
"learning_rate": 2.6728924785461892e-05, |
|
"loss": 0.0683, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5502271580010096, |
|
"grad_norm": 1.0054854154586792, |
|
"learning_rate": 2.6698637051993942e-05, |
|
"loss": 0.0676, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5552751135790005, |
|
"grad_norm": 0.5544041395187378, |
|
"learning_rate": 2.6668349318525995e-05, |
|
"loss": 0.075, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5603230691569914, |
|
"grad_norm": 0.6919006109237671, |
|
"learning_rate": 2.6638061585058052e-05, |
|
"loss": 0.0709, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5653710247349824, |
|
"grad_norm": 0.5584747791290283, |
|
"learning_rate": 2.6607773851590106e-05, |
|
"loss": 0.0623, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5704189803129732, |
|
"grad_norm": 0.47064319252967834, |
|
"learning_rate": 2.657748611812216e-05, |
|
"loss": 0.0744, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5754669358909642, |
|
"grad_norm": 0.5119986534118652, |
|
"learning_rate": 2.6547198384654216e-05, |
|
"loss": 0.0795, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5805148914689551, |
|
"grad_norm": 0.9572923183441162, |
|
"learning_rate": 2.651691065118627e-05, |
|
"loss": 0.073, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.585562847046946, |
|
"grad_norm": 0.5633489489555359, |
|
"learning_rate": 2.6486622917718326e-05, |
|
"loss": 0.0637, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5906108026249369, |
|
"grad_norm": 1.1218105554580688, |
|
"learning_rate": 2.645633518425038e-05, |
|
"loss": 0.0695, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5956587582029278, |
|
"grad_norm": 0.6655285954475403, |
|
"learning_rate": 2.6426047450782433e-05, |
|
"loss": 0.0774, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6007067137809188, |
|
"grad_norm": 1.3088024854660034, |
|
"learning_rate": 2.639575971731449e-05, |
|
"loss": 0.0748, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6057546693589096, |
|
"grad_norm": 0.9868513941764832, |
|
"learning_rate": 2.6365471983846543e-05, |
|
"loss": 0.0695, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6108026249369005, |
|
"grad_norm": 0.5922626852989197, |
|
"learning_rate": 2.63351842503786e-05, |
|
"loss": 0.0678, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6158505805148915, |
|
"grad_norm": 0.6839954257011414, |
|
"learning_rate": 2.630489651691065e-05, |
|
"loss": 0.0693, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6208985360928824, |
|
"grad_norm": 0.6755519509315491, |
|
"learning_rate": 2.6274608783442704e-05, |
|
"loss": 0.0742, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6259464916708734, |
|
"grad_norm": 0.4968509078025818, |
|
"learning_rate": 2.624432104997476e-05, |
|
"loss": 0.0615, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6309944472488642, |
|
"grad_norm": 1.1036404371261597, |
|
"learning_rate": 2.6214033316506814e-05, |
|
"loss": 0.0727, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6360424028268551, |
|
"grad_norm": 0.810405969619751, |
|
"learning_rate": 2.618374558303887e-05, |
|
"loss": 0.072, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6410903584048461, |
|
"grad_norm": 0.730140209197998, |
|
"learning_rate": 2.6153457849570924e-05, |
|
"loss": 0.0652, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6461383139828369, |
|
"grad_norm": 1.1645480394363403, |
|
"learning_rate": 2.6123170116102978e-05, |
|
"loss": 0.0716, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6511862695608278, |
|
"grad_norm": 0.8481037020683289, |
|
"learning_rate": 2.6092882382635034e-05, |
|
"loss": 0.0737, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6562342251388188, |
|
"grad_norm": 0.5972946882247925, |
|
"learning_rate": 2.6062594649167088e-05, |
|
"loss": 0.0704, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6612821807168097, |
|
"grad_norm": 0.6405556201934814, |
|
"learning_rate": 2.6032306915699145e-05, |
|
"loss": 0.0628, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6663301362948006, |
|
"grad_norm": 0.8645715117454529, |
|
"learning_rate": 2.6002019182231198e-05, |
|
"loss": 0.0742, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6713780918727915, |
|
"grad_norm": 1.4211089611053467, |
|
"learning_rate": 2.597173144876325e-05, |
|
"loss": 0.0731, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6764260474507824, |
|
"grad_norm": 0.8079481720924377, |
|
"learning_rate": 2.594144371529531e-05, |
|
"loss": 0.0732, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6814740030287734, |
|
"grad_norm": 0.6517273783683777, |
|
"learning_rate": 2.591115598182736e-05, |
|
"loss": 0.0688, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6865219586067642, |
|
"grad_norm": 1.2093323469161987, |
|
"learning_rate": 2.5880868248359415e-05, |
|
"loss": 0.0729, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6915699141847552, |
|
"grad_norm": 0.6432307362556458, |
|
"learning_rate": 2.585058051489147e-05, |
|
"loss": 0.076, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6966178697627461, |
|
"grad_norm": 0.5220794677734375, |
|
"learning_rate": 2.5820292781423522e-05, |
|
"loss": 0.0702, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.701665825340737, |
|
"grad_norm": 1.0983613729476929, |
|
"learning_rate": 2.579000504795558e-05, |
|
"loss": 0.0676, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7067137809187279, |
|
"grad_norm": 0.859348475933075, |
|
"learning_rate": 2.5759717314487633e-05, |
|
"loss": 0.0615, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7117617364967188, |
|
"grad_norm": 0.7912864685058594, |
|
"learning_rate": 2.572942958101969e-05, |
|
"loss": 0.0681, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7168096920747098, |
|
"grad_norm": 0.6189167499542236, |
|
"learning_rate": 2.5699141847551743e-05, |
|
"loss": 0.0682, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7218576476527007, |
|
"grad_norm": 0.5456287860870361, |
|
"learning_rate": 2.5668854114083796e-05, |
|
"loss": 0.0591, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7269056032306915, |
|
"grad_norm": 0.485055148601532, |
|
"learning_rate": 2.5638566380615853e-05, |
|
"loss": 0.0729, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7319535588086825, |
|
"grad_norm": 0.46423906087875366, |
|
"learning_rate": 2.5608278647147907e-05, |
|
"loss": 0.0646, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7370015143866734, |
|
"grad_norm": 0.5944865345954895, |
|
"learning_rate": 2.557799091367996e-05, |
|
"loss": 0.0696, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7420494699646644, |
|
"grad_norm": 0.794015645980835, |
|
"learning_rate": 2.5547703180212014e-05, |
|
"loss": 0.0671, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7470974255426552, |
|
"grad_norm": 0.6759900450706482, |
|
"learning_rate": 2.5517415446744067e-05, |
|
"loss": 0.074, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7521453811206461, |
|
"grad_norm": 0.6719480156898499, |
|
"learning_rate": 2.5487127713276124e-05, |
|
"loss": 0.0708, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7571933366986371, |
|
"grad_norm": 0.7934426665306091, |
|
"learning_rate": 2.5456839979808177e-05, |
|
"loss": 0.0664, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.762241292276628, |
|
"grad_norm": 1.4169378280639648, |
|
"learning_rate": 2.542655224634023e-05, |
|
"loss": 0.0726, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7672892478546188, |
|
"grad_norm": 0.5849716067314148, |
|
"learning_rate": 2.5396264512872288e-05, |
|
"loss": 0.0709, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7723372034326098, |
|
"grad_norm": 0.8471559286117554, |
|
"learning_rate": 2.536597677940434e-05, |
|
"loss": 0.0764, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7773851590106007, |
|
"grad_norm": 0.7494149804115295, |
|
"learning_rate": 2.5335689045936398e-05, |
|
"loss": 0.0629, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7824331145885917, |
|
"grad_norm": 0.7659397721290588, |
|
"learning_rate": 2.530540131246845e-05, |
|
"loss": 0.061, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7874810701665825, |
|
"grad_norm": 0.8505954146385193, |
|
"learning_rate": 2.5275113579000505e-05, |
|
"loss": 0.0693, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7925290257445734, |
|
"grad_norm": 0.8126624226570129, |
|
"learning_rate": 2.524482584553256e-05, |
|
"loss": 0.0738, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7975769813225644, |
|
"grad_norm": 0.9350792765617371, |
|
"learning_rate": 2.5214538112064615e-05, |
|
"loss": 0.0821, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8026249369005553, |
|
"grad_norm": 1.075035810470581, |
|
"learning_rate": 2.5184250378596672e-05, |
|
"loss": 0.0758, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8076728924785462, |
|
"grad_norm": 0.6885321736335754, |
|
"learning_rate": 2.5153962645128722e-05, |
|
"loss": 0.0641, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8127208480565371, |
|
"grad_norm": 0.7702226042747498, |
|
"learning_rate": 2.5123674911660775e-05, |
|
"loss": 0.0642, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.817768803634528, |
|
"grad_norm": 0.9809953570365906, |
|
"learning_rate": 2.5093387178192832e-05, |
|
"loss": 0.0759, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.822816759212519, |
|
"grad_norm": 0.5996444225311279, |
|
"learning_rate": 2.5063099444724886e-05, |
|
"loss": 0.0686, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8278647147905098, |
|
"grad_norm": 0.5003983378410339, |
|
"learning_rate": 2.5032811711256942e-05, |
|
"loss": 0.0697, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8329126703685008, |
|
"grad_norm": 0.7024896740913391, |
|
"learning_rate": 2.5002523977788996e-05, |
|
"loss": 0.0699, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8379606259464917, |
|
"grad_norm": 0.5384397506713867, |
|
"learning_rate": 2.497223624432105e-05, |
|
"loss": 0.0684, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8430085815244825, |
|
"grad_norm": 1.176849126815796, |
|
"learning_rate": 2.4941948510853106e-05, |
|
"loss": 0.065, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8480565371024735, |
|
"grad_norm": 0.7623859643936157, |
|
"learning_rate": 2.491166077738516e-05, |
|
"loss": 0.0676, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8531044926804644, |
|
"grad_norm": 0.8817411065101624, |
|
"learning_rate": 2.4881373043917216e-05, |
|
"loss": 0.0712, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8581524482584554, |
|
"grad_norm": 0.7471240162849426, |
|
"learning_rate": 2.485108531044927e-05, |
|
"loss": 0.0719, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8632004038364463, |
|
"grad_norm": 0.9217013120651245, |
|
"learning_rate": 2.4820797576981323e-05, |
|
"loss": 0.0758, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8682483594144371, |
|
"grad_norm": 0.4985320568084717, |
|
"learning_rate": 2.479050984351338e-05, |
|
"loss": 0.075, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8732963149924281, |
|
"grad_norm": 0.47823965549468994, |
|
"learning_rate": 2.476022211004543e-05, |
|
"loss": 0.0576, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.878344270570419, |
|
"grad_norm": 0.5073914527893066, |
|
"learning_rate": 2.4729934376577487e-05, |
|
"loss": 0.0619, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8833922261484098, |
|
"grad_norm": 0.6744971871376038, |
|
"learning_rate": 2.469964664310954e-05, |
|
"loss": 0.0674, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8884401817264008, |
|
"grad_norm": 0.7287705540657043, |
|
"learning_rate": 2.4669358909641594e-05, |
|
"loss": 0.0705, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8934881373043917, |
|
"grad_norm": 0.6387834548950195, |
|
"learning_rate": 2.463907117617365e-05, |
|
"loss": 0.0736, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8985360928823827, |
|
"grad_norm": 0.8428398370742798, |
|
"learning_rate": 2.4608783442705704e-05, |
|
"loss": 0.0741, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9035840484603735, |
|
"grad_norm": 0.6455987691879272, |
|
"learning_rate": 2.4578495709237758e-05, |
|
"loss": 0.0639, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9086320040383644, |
|
"grad_norm": 0.6735292673110962, |
|
"learning_rate": 2.4548207975769815e-05, |
|
"loss": 0.0795, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9136799596163554, |
|
"grad_norm": 0.6157563924789429, |
|
"learning_rate": 2.4517920242301868e-05, |
|
"loss": 0.0699, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9187279151943463, |
|
"grad_norm": 0.7483514547348022, |
|
"learning_rate": 2.4487632508833925e-05, |
|
"loss": 0.0681, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9237758707723372, |
|
"grad_norm": 0.5686767101287842, |
|
"learning_rate": 2.4457344775365978e-05, |
|
"loss": 0.0713, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9288238263503281, |
|
"grad_norm": 0.352909654378891, |
|
"learning_rate": 2.4427057041898032e-05, |
|
"loss": 0.0641, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.933871781928319, |
|
"grad_norm": 0.6095912456512451, |
|
"learning_rate": 2.439676930843009e-05, |
|
"loss": 0.0794, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.93891973750631, |
|
"grad_norm": 0.3929665684700012, |
|
"learning_rate": 2.436648157496214e-05, |
|
"loss": 0.0672, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9439676930843008, |
|
"grad_norm": 0.22026501595973969, |
|
"learning_rate": 2.4336193841494195e-05, |
|
"loss": 0.0699, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9490156486622918, |
|
"grad_norm": 0.5952547788619995, |
|
"learning_rate": 2.430590610802625e-05, |
|
"loss": 0.0733, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9540636042402827, |
|
"grad_norm": 0.7297592163085938, |
|
"learning_rate": 2.4275618374558302e-05, |
|
"loss": 0.0725, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9591115598182736, |
|
"grad_norm": 0.35177797079086304, |
|
"learning_rate": 2.424533064109036e-05, |
|
"loss": 0.0651, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9641595153962645, |
|
"grad_norm": 0.6706666350364685, |
|
"learning_rate": 2.4215042907622413e-05, |
|
"loss": 0.0737, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9692074709742554, |
|
"grad_norm": 0.7155650854110718, |
|
"learning_rate": 2.418475517415447e-05, |
|
"loss": 0.074, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.9742554265522464, |
|
"grad_norm": 0.5200046300888062, |
|
"learning_rate": 2.4154467440686523e-05, |
|
"loss": 0.0706, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.9793033821302373, |
|
"grad_norm": 0.46796679496765137, |
|
"learning_rate": 2.4124179707218576e-05, |
|
"loss": 0.0592, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.9843513377082281, |
|
"grad_norm": 0.5713896751403809, |
|
"learning_rate": 2.4093891973750633e-05, |
|
"loss": 0.0586, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9893992932862191, |
|
"grad_norm": 0.9147453308105469, |
|
"learning_rate": 2.4063604240282687e-05, |
|
"loss": 0.0848, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.99444724886421, |
|
"grad_norm": 1.1067036390304565, |
|
"learning_rate": 2.4033316506814744e-05, |
|
"loss": 0.07, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9994952044422009, |
|
"grad_norm": 0.5658775568008423, |
|
"learning_rate": 2.4003028773346797e-05, |
|
"loss": 0.0594, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.9705180789481339, |
|
"eval_loss": 0.04397369921207428, |
|
"eval_runtime": 594.1594, |
|
"eval_samples_per_second": 347.149, |
|
"eval_steps_per_second": 2.713, |
|
"step": 1981 |
|
}, |
|
{ |
|
"epoch": 1.0045431600201917, |
|
"grad_norm": 0.6783074736595154, |
|
"learning_rate": 2.3972741039878847e-05, |
|
"loss": 0.0783, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0095911155981827, |
|
"grad_norm": 0.5741100311279297, |
|
"learning_rate": 2.3942453306410904e-05, |
|
"loss": 0.0612, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0146390711761737, |
|
"grad_norm": 0.8516017198562622, |
|
"learning_rate": 2.3912165572942957e-05, |
|
"loss": 0.0654, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0196870267541647, |
|
"grad_norm": 0.48648303747177124, |
|
"learning_rate": 2.3881877839475014e-05, |
|
"loss": 0.0659, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0247349823321554, |
|
"grad_norm": 0.48170068860054016, |
|
"learning_rate": 2.3851590106007068e-05, |
|
"loss": 0.0687, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0297829379101464, |
|
"grad_norm": 0.8060422539710999, |
|
"learning_rate": 2.382130237253912e-05, |
|
"loss": 0.0741, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.0348308934881374, |
|
"grad_norm": 0.3721982538700104, |
|
"learning_rate": 2.3791014639071178e-05, |
|
"loss": 0.0643, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.0398788490661282, |
|
"grad_norm": 0.9289938807487488, |
|
"learning_rate": 2.376072690560323e-05, |
|
"loss": 0.0678, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0449268046441191, |
|
"grad_norm": 0.7339480519294739, |
|
"learning_rate": 2.3730439172135288e-05, |
|
"loss": 0.065, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0499747602221101, |
|
"grad_norm": 0.5676091313362122, |
|
"learning_rate": 2.370015143866734e-05, |
|
"loss": 0.0665, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0550227158001009, |
|
"grad_norm": 1.0972354412078857, |
|
"learning_rate": 2.3669863705199395e-05, |
|
"loss": 0.0664, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.0600706713780919, |
|
"grad_norm": 1.11980402469635, |
|
"learning_rate": 2.3639575971731452e-05, |
|
"loss": 0.0742, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.0651186269560828, |
|
"grad_norm": 0.6586318016052246, |
|
"learning_rate": 2.3609288238263502e-05, |
|
"loss": 0.0755, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0701665825340738, |
|
"grad_norm": 0.6912874579429626, |
|
"learning_rate": 2.3579000504795555e-05, |
|
"loss": 0.0722, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0752145381120646, |
|
"grad_norm": 0.5603944659233093, |
|
"learning_rate": 2.3548712771327612e-05, |
|
"loss": 0.0636, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.0802624936900556, |
|
"grad_norm": 0.7324510216712952, |
|
"learning_rate": 2.3518425037859666e-05, |
|
"loss": 0.0697, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.0853104492680465, |
|
"grad_norm": 0.6833095550537109, |
|
"learning_rate": 2.3488137304391723e-05, |
|
"loss": 0.0678, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.0903584048460373, |
|
"grad_norm": 0.49107661843299866, |
|
"learning_rate": 2.3457849570923776e-05, |
|
"loss": 0.0608, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.0954063604240283, |
|
"grad_norm": 0.541980504989624, |
|
"learning_rate": 2.342756183745583e-05, |
|
"loss": 0.0645, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1004543160020193, |
|
"grad_norm": 0.487343966960907, |
|
"learning_rate": 2.3397274103987886e-05, |
|
"loss": 0.0573, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.10550227158001, |
|
"grad_norm": 0.3503382205963135, |
|
"learning_rate": 2.336698637051994e-05, |
|
"loss": 0.0753, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.110550227158001, |
|
"grad_norm": 0.750566840171814, |
|
"learning_rate": 2.3336698637051997e-05, |
|
"loss": 0.0703, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.115598182735992, |
|
"grad_norm": 1.1437385082244873, |
|
"learning_rate": 2.330641090358405e-05, |
|
"loss": 0.0706, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1206461383139827, |
|
"grad_norm": 0.4508492648601532, |
|
"learning_rate": 2.3276123170116103e-05, |
|
"loss": 0.064, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1256940938919737, |
|
"grad_norm": 1.0053447484970093, |
|
"learning_rate": 2.324583543664816e-05, |
|
"loss": 0.0595, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.1307420494699647, |
|
"grad_norm": 0.5974487662315369, |
|
"learning_rate": 2.321554770318021e-05, |
|
"loss": 0.0613, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1357900050479555, |
|
"grad_norm": 0.48302361369132996, |
|
"learning_rate": 2.3185259969712267e-05, |
|
"loss": 0.0553, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1408379606259464, |
|
"grad_norm": 0.7124462127685547, |
|
"learning_rate": 2.315497223624432e-05, |
|
"loss": 0.0628, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1458859162039374, |
|
"grad_norm": 0.8712441921234131, |
|
"learning_rate": 2.3124684502776374e-05, |
|
"loss": 0.066, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1509338717819284, |
|
"grad_norm": 0.7473580241203308, |
|
"learning_rate": 2.309439676930843e-05, |
|
"loss": 0.0687, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1559818273599192, |
|
"grad_norm": 0.8231186866760254, |
|
"learning_rate": 2.3064109035840484e-05, |
|
"loss": 0.0686, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1610297829379101, |
|
"grad_norm": 0.5205137729644775, |
|
"learning_rate": 2.303382130237254e-05, |
|
"loss": 0.0668, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.1660777385159011, |
|
"grad_norm": 0.5173012614250183, |
|
"learning_rate": 2.3003533568904595e-05, |
|
"loss": 0.0664, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.171125694093892, |
|
"grad_norm": 0.6976504325866699, |
|
"learning_rate": 2.2973245835436648e-05, |
|
"loss": 0.067, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1761736496718829, |
|
"grad_norm": 0.7795687317848206, |
|
"learning_rate": 2.2942958101968705e-05, |
|
"loss": 0.0591, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.1812216052498739, |
|
"grad_norm": 0.35292479395866394, |
|
"learning_rate": 2.291267036850076e-05, |
|
"loss": 0.0721, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.1862695608278648, |
|
"grad_norm": 1.548770546913147, |
|
"learning_rate": 2.2882382635032815e-05, |
|
"loss": 0.0608, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.1913175164058556, |
|
"grad_norm": 0.521295964717865, |
|
"learning_rate": 2.285209490156487e-05, |
|
"loss": 0.0735, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.1963654719838466, |
|
"grad_norm": 0.6001691818237305, |
|
"learning_rate": 2.282180716809692e-05, |
|
"loss": 0.0646, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2014134275618376, |
|
"grad_norm": 0.9061608910560608, |
|
"learning_rate": 2.2791519434628976e-05, |
|
"loss": 0.0598, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2064613831398283, |
|
"grad_norm": 0.6509453654289246, |
|
"learning_rate": 2.276123170116103e-05, |
|
"loss": 0.0591, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2115093387178193, |
|
"grad_norm": 0.4685826301574707, |
|
"learning_rate": 2.2730943967693086e-05, |
|
"loss": 0.0675, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.2165572942958103, |
|
"grad_norm": 0.4527621865272522, |
|
"learning_rate": 2.270065623422514e-05, |
|
"loss": 0.0635, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.221605249873801, |
|
"grad_norm": 0.46990010142326355, |
|
"learning_rate": 2.2670368500757193e-05, |
|
"loss": 0.0609, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.226653205451792, |
|
"grad_norm": 0.7978981137275696, |
|
"learning_rate": 2.264008076728925e-05, |
|
"loss": 0.0682, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.231701161029783, |
|
"grad_norm": 0.5001055598258972, |
|
"learning_rate": 2.2609793033821303e-05, |
|
"loss": 0.0657, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.2367491166077738, |
|
"grad_norm": 0.7271714806556702, |
|
"learning_rate": 2.2579505300353356e-05, |
|
"loss": 0.0627, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.2417970721857647, |
|
"grad_norm": 0.3601450026035309, |
|
"learning_rate": 2.2549217566885413e-05, |
|
"loss": 0.0649, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.2468450277637557, |
|
"grad_norm": 0.6351629495620728, |
|
"learning_rate": 2.2518929833417467e-05, |
|
"loss": 0.0619, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2518929833417465, |
|
"grad_norm": 0.8523517847061157, |
|
"learning_rate": 2.2488642099949524e-05, |
|
"loss": 0.078, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2569409389197375, |
|
"grad_norm": 1.0878459215164185, |
|
"learning_rate": 2.2458354366481577e-05, |
|
"loss": 0.0636, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2619888944977284, |
|
"grad_norm": 0.6811727285385132, |
|
"learning_rate": 2.2428066633013627e-05, |
|
"loss": 0.0703, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.2670368500757192, |
|
"grad_norm": 0.6043427586555481, |
|
"learning_rate": 2.2397778899545684e-05, |
|
"loss": 0.0587, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.2720848056537102, |
|
"grad_norm": 0.6673144102096558, |
|
"learning_rate": 2.2367491166077737e-05, |
|
"loss": 0.0675, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.2771327612317012, |
|
"grad_norm": 0.3510701358318329, |
|
"learning_rate": 2.2337203432609794e-05, |
|
"loss": 0.069, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.2821807168096921, |
|
"grad_norm": 0.302438884973526, |
|
"learning_rate": 2.2306915699141848e-05, |
|
"loss": 0.0609, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.2872286723876831, |
|
"grad_norm": 0.8073706030845642, |
|
"learning_rate": 2.22766279656739e-05, |
|
"loss": 0.076, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2922766279656739, |
|
"grad_norm": 0.7314086556434631, |
|
"learning_rate": 2.2246340232205958e-05, |
|
"loss": 0.0676, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.2973245835436649, |
|
"grad_norm": 0.6998431086540222, |
|
"learning_rate": 2.221605249873801e-05, |
|
"loss": 0.0594, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.3023725391216558, |
|
"grad_norm": 0.9340649843215942, |
|
"learning_rate": 2.2185764765270068e-05, |
|
"loss": 0.0601, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3074204946996466, |
|
"grad_norm": 0.5486651062965393, |
|
"learning_rate": 2.215547703180212e-05, |
|
"loss": 0.0752, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3124684502776376, |
|
"grad_norm": 0.3997117280960083, |
|
"learning_rate": 2.2125189298334175e-05, |
|
"loss": 0.0669, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3175164058556286, |
|
"grad_norm": 0.6159607172012329, |
|
"learning_rate": 2.2094901564866232e-05, |
|
"loss": 0.0646, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3225643614336193, |
|
"grad_norm": 1.0720511674880981, |
|
"learning_rate": 2.2064613831398285e-05, |
|
"loss": 0.0697, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3276123170116103, |
|
"grad_norm": 0.6496064066886902, |
|
"learning_rate": 2.203432609793034e-05, |
|
"loss": 0.0642, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3326602725896013, |
|
"grad_norm": 0.5649464726448059, |
|
"learning_rate": 2.2004038364462392e-05, |
|
"loss": 0.0596, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.337708228167592, |
|
"grad_norm": 0.5532758235931396, |
|
"learning_rate": 2.1973750630994446e-05, |
|
"loss": 0.0651, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.342756183745583, |
|
"grad_norm": 0.4955766797065735, |
|
"learning_rate": 2.1943462897526503e-05, |
|
"loss": 0.0661, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.347804139323574, |
|
"grad_norm": 0.5403378009796143, |
|
"learning_rate": 2.1913175164058556e-05, |
|
"loss": 0.068, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.3528520949015648, |
|
"grad_norm": 0.8987810015678406, |
|
"learning_rate": 2.1882887430590613e-05, |
|
"loss": 0.0551, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.3579000504795558, |
|
"grad_norm": 0.5531570911407471, |
|
"learning_rate": 2.1852599697122666e-05, |
|
"loss": 0.0554, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.3629480060575467, |
|
"grad_norm": 0.8810332417488098, |
|
"learning_rate": 2.182231196365472e-05, |
|
"loss": 0.0683, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.3679959616355375, |
|
"grad_norm": 0.8977289199829102, |
|
"learning_rate": 2.1792024230186777e-05, |
|
"loss": 0.0682, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.3730439172135285, |
|
"grad_norm": 0.6664491295814514, |
|
"learning_rate": 2.176173649671883e-05, |
|
"loss": 0.0652, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.3780918727915195, |
|
"grad_norm": 0.7725427150726318, |
|
"learning_rate": 2.1731448763250883e-05, |
|
"loss": 0.0693, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.3831398283695102, |
|
"grad_norm": 1.149824857711792, |
|
"learning_rate": 2.170116102978294e-05, |
|
"loss": 0.0697, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.3881877839475012, |
|
"grad_norm": 0.8231659531593323, |
|
"learning_rate": 2.167087329631499e-05, |
|
"loss": 0.0586, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.3932357395254922, |
|
"grad_norm": 0.5706813335418701, |
|
"learning_rate": 2.1640585562847047e-05, |
|
"loss": 0.0648, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.3982836951034832, |
|
"grad_norm": 0.4602285623550415, |
|
"learning_rate": 2.16102978293791e-05, |
|
"loss": 0.0642, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.4033316506814741, |
|
"grad_norm": 0.5022104978561401, |
|
"learning_rate": 2.1580010095911154e-05, |
|
"loss": 0.0582, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.408379606259465, |
|
"grad_norm": 0.3675612211227417, |
|
"learning_rate": 2.154972236244321e-05, |
|
"loss": 0.0685, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.4134275618374559, |
|
"grad_norm": 0.5692434906959534, |
|
"learning_rate": 2.1519434628975264e-05, |
|
"loss": 0.0625, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.4184755174154469, |
|
"grad_norm": 0.44433364272117615, |
|
"learning_rate": 2.148914689550732e-05, |
|
"loss": 0.0683, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4235234729934376, |
|
"grad_norm": 0.5225184559822083, |
|
"learning_rate": 2.1458859162039375e-05, |
|
"loss": 0.0676, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 1.125475287437439, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.0641, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4336193841494196, |
|
"grad_norm": 0.6783428192138672, |
|
"learning_rate": 2.1398283695103485e-05, |
|
"loss": 0.0735, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4386673397274103, |
|
"grad_norm": 0.6056823134422302, |
|
"learning_rate": 2.136799596163554e-05, |
|
"loss": 0.0607, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4437152953054013, |
|
"grad_norm": 0.7588714361190796, |
|
"learning_rate": 2.1337708228167595e-05, |
|
"loss": 0.0638, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.4487632508833923, |
|
"grad_norm": 0.5353738069534302, |
|
"learning_rate": 2.130742049469965e-05, |
|
"loss": 0.0628, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.453811206461383, |
|
"grad_norm": 0.3690322935581207, |
|
"learning_rate": 2.12771327612317e-05, |
|
"loss": 0.055, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.458859162039374, |
|
"grad_norm": 0.5556847453117371, |
|
"learning_rate": 2.1246845027763756e-05, |
|
"loss": 0.0672, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.463907117617365, |
|
"grad_norm": 0.5658410787582397, |
|
"learning_rate": 2.121655729429581e-05, |
|
"loss": 0.0634, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.4689550731953558, |
|
"grad_norm": 1.1000596284866333, |
|
"learning_rate": 2.1186269560827866e-05, |
|
"loss": 0.0648, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.4740030287733468, |
|
"grad_norm": 0.5739458799362183, |
|
"learning_rate": 2.115598182735992e-05, |
|
"loss": 0.0622, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.4790509843513377, |
|
"grad_norm": 0.9371837377548218, |
|
"learning_rate": 2.1125694093891973e-05, |
|
"loss": 0.067, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.4840989399293285, |
|
"grad_norm": 0.5997252464294434, |
|
"learning_rate": 2.109540636042403e-05, |
|
"loss": 0.0665, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.4891468955073195, |
|
"grad_norm": 0.6729413866996765, |
|
"learning_rate": 2.1065118626956083e-05, |
|
"loss": 0.0576, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.4941948510853105, |
|
"grad_norm": 0.796592652797699, |
|
"learning_rate": 2.103483089348814e-05, |
|
"loss": 0.0671, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4992428066633012, |
|
"grad_norm": 0.7947612404823303, |
|
"learning_rate": 2.1004543160020193e-05, |
|
"loss": 0.0701, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5042907622412924, |
|
"grad_norm": 0.7790849208831787, |
|
"learning_rate": 2.0974255426552247e-05, |
|
"loss": 0.065, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.5093387178192832, |
|
"grad_norm": 0.5330706238746643, |
|
"learning_rate": 2.0943967693084304e-05, |
|
"loss": 0.0587, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.514386673397274, |
|
"grad_norm": 1.0482598543167114, |
|
"learning_rate": 2.0913679959616357e-05, |
|
"loss": 0.0696, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5194346289752652, |
|
"grad_norm": 0.46928080916404724, |
|
"learning_rate": 2.088339222614841e-05, |
|
"loss": 0.0668, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.524482584553256, |
|
"grad_norm": 1.0525529384613037, |
|
"learning_rate": 2.0853104492680464e-05, |
|
"loss": 0.0664, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.529530540131247, |
|
"grad_norm": 0.43941500782966614, |
|
"learning_rate": 2.0822816759212517e-05, |
|
"loss": 0.0642, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.5345784957092379, |
|
"grad_norm": 0.6985353231430054, |
|
"learning_rate": 2.0792529025744574e-05, |
|
"loss": 0.068, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5396264512872286, |
|
"grad_norm": 0.6110888123512268, |
|
"learning_rate": 2.0762241292276628e-05, |
|
"loss": 0.0639, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5446744068652196, |
|
"grad_norm": 0.8250141739845276, |
|
"learning_rate": 2.073195355880868e-05, |
|
"loss": 0.0614, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.5497223624432106, |
|
"grad_norm": 0.4882888197898865, |
|
"learning_rate": 2.0701665825340738e-05, |
|
"loss": 0.066, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5547703180212014, |
|
"grad_norm": 0.38679155707359314, |
|
"learning_rate": 2.067137809187279e-05, |
|
"loss": 0.0684, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.5598182735991923, |
|
"grad_norm": 0.6574121117591858, |
|
"learning_rate": 2.0641090358404848e-05, |
|
"loss": 0.0666, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.5648662291771833, |
|
"grad_norm": 0.48571038246154785, |
|
"learning_rate": 2.0610802624936902e-05, |
|
"loss": 0.0646, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.569914184755174, |
|
"grad_norm": 0.8285214304924011, |
|
"learning_rate": 2.0580514891468955e-05, |
|
"loss": 0.0634, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.574962140333165, |
|
"grad_norm": 0.5619475245475769, |
|
"learning_rate": 2.0550227158001012e-05, |
|
"loss": 0.0665, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.580010095911156, |
|
"grad_norm": 0.47569337487220764, |
|
"learning_rate": 2.0519939424533065e-05, |
|
"loss": 0.0661, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.5850580514891468, |
|
"grad_norm": 0.8858407139778137, |
|
"learning_rate": 2.048965169106512e-05, |
|
"loss": 0.0696, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.5901060070671378, |
|
"grad_norm": 0.5578007698059082, |
|
"learning_rate": 2.0459363957597172e-05, |
|
"loss": 0.0547, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.5951539626451288, |
|
"grad_norm": 0.6875492334365845, |
|
"learning_rate": 2.0429076224129226e-05, |
|
"loss": 0.0608, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.6002019182231195, |
|
"grad_norm": 0.5009766221046448, |
|
"learning_rate": 2.0398788490661283e-05, |
|
"loss": 0.0684, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.6052498738011105, |
|
"grad_norm": 0.7467596530914307, |
|
"learning_rate": 2.0368500757193336e-05, |
|
"loss": 0.0654, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6102978293791015, |
|
"grad_norm": 0.5688017010688782, |
|
"learning_rate": 2.0338213023725393e-05, |
|
"loss": 0.0594, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.6153457849570922, |
|
"grad_norm": 0.9353786110877991, |
|
"learning_rate": 2.0307925290257446e-05, |
|
"loss": 0.0685, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6203937405350834, |
|
"grad_norm": 0.5310063362121582, |
|
"learning_rate": 2.02776375567895e-05, |
|
"loss": 0.0597, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.6254416961130742, |
|
"grad_norm": 1.107693076133728, |
|
"learning_rate": 2.0247349823321557e-05, |
|
"loss": 0.0722, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.630489651691065, |
|
"grad_norm": 0.688391923904419, |
|
"learning_rate": 2.021706208985361e-05, |
|
"loss": 0.0719, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.6355376072690562, |
|
"grad_norm": 0.4255257546901703, |
|
"learning_rate": 2.0186774356385667e-05, |
|
"loss": 0.0638, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.640585562847047, |
|
"grad_norm": 0.6049216389656067, |
|
"learning_rate": 2.015648662291772e-05, |
|
"loss": 0.0555, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.645633518425038, |
|
"grad_norm": 0.6898351311683655, |
|
"learning_rate": 2.012619888944977e-05, |
|
"loss": 0.0599, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.650681474003029, |
|
"grad_norm": 0.6150475144386292, |
|
"learning_rate": 2.0095911155981827e-05, |
|
"loss": 0.0664, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.6557294295810197, |
|
"grad_norm": 0.5084889531135559, |
|
"learning_rate": 2.006562342251388e-05, |
|
"loss": 0.0574, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.6607773851590106, |
|
"grad_norm": 0.9478010535240173, |
|
"learning_rate": 2.0035335689045938e-05, |
|
"loss": 0.0619, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.6658253407370016, |
|
"grad_norm": 1.1725986003875732, |
|
"learning_rate": 2.000504795557799e-05, |
|
"loss": 0.0672, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.6708732963149924, |
|
"grad_norm": 0.8932427763938904, |
|
"learning_rate": 1.9974760222110044e-05, |
|
"loss": 0.0604, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.6759212518929834, |
|
"grad_norm": 0.4670265316963196, |
|
"learning_rate": 1.99444724886421e-05, |
|
"loss": 0.0658, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.6809692074709743, |
|
"grad_norm": 0.518844485282898, |
|
"learning_rate": 1.9914184755174155e-05, |
|
"loss": 0.068, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.686017163048965, |
|
"grad_norm": 0.7717642784118652, |
|
"learning_rate": 1.988389702170621e-05, |
|
"loss": 0.0594, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.691065118626956, |
|
"grad_norm": 0.9715004563331604, |
|
"learning_rate": 1.9853609288238265e-05, |
|
"loss": 0.0651, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.696113074204947, |
|
"grad_norm": 0.7362111210823059, |
|
"learning_rate": 1.982332155477032e-05, |
|
"loss": 0.0664, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.7011610297829378, |
|
"grad_norm": 0.480751633644104, |
|
"learning_rate": 1.9793033821302375e-05, |
|
"loss": 0.0609, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.7062089853609288, |
|
"grad_norm": 0.31802135705947876, |
|
"learning_rate": 1.976274608783443e-05, |
|
"loss": 0.0658, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.7112569409389198, |
|
"grad_norm": 0.5285906195640564, |
|
"learning_rate": 1.973245835436648e-05, |
|
"loss": 0.0606, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.7163048965169105, |
|
"grad_norm": 0.7230745553970337, |
|
"learning_rate": 1.9702170620898536e-05, |
|
"loss": 0.0618, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7213528520949015, |
|
"grad_norm": 0.566842257976532, |
|
"learning_rate": 1.967188288743059e-05, |
|
"loss": 0.0623, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.7264008076728925, |
|
"grad_norm": 0.9110565781593323, |
|
"learning_rate": 1.9641595153962646e-05, |
|
"loss": 0.0712, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.7314487632508833, |
|
"grad_norm": 0.5621252059936523, |
|
"learning_rate": 1.96113074204947e-05, |
|
"loss": 0.0624, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.7364967188288745, |
|
"grad_norm": 0.6153441667556763, |
|
"learning_rate": 1.9581019687026753e-05, |
|
"loss": 0.0679, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7415446744068652, |
|
"grad_norm": 0.7521117925643921, |
|
"learning_rate": 1.955073195355881e-05, |
|
"loss": 0.073, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.746592629984856, |
|
"grad_norm": 0.7781336307525635, |
|
"learning_rate": 1.9520444220090863e-05, |
|
"loss": 0.0576, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.7516405855628472, |
|
"grad_norm": 0.5981038808822632, |
|
"learning_rate": 1.949015648662292e-05, |
|
"loss": 0.0558, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.756688541140838, |
|
"grad_norm": 0.5716273188591003, |
|
"learning_rate": 1.9459868753154973e-05, |
|
"loss": 0.0615, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.761736496718829, |
|
"grad_norm": 1.0969016551971436, |
|
"learning_rate": 1.9429581019687027e-05, |
|
"loss": 0.0695, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.76678445229682, |
|
"grad_norm": 0.4081050157546997, |
|
"learning_rate": 1.9399293286219084e-05, |
|
"loss": 0.0569, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7718324078748107, |
|
"grad_norm": 0.6996564269065857, |
|
"learning_rate": 1.9369005552751137e-05, |
|
"loss": 0.0615, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.7768803634528016, |
|
"grad_norm": 0.7040839791297913, |
|
"learning_rate": 1.933871781928319e-05, |
|
"loss": 0.0609, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.7819283190307926, |
|
"grad_norm": 0.6955099105834961, |
|
"learning_rate": 1.9308430085815244e-05, |
|
"loss": 0.0596, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.7869762746087834, |
|
"grad_norm": 0.49400514364242554, |
|
"learning_rate": 1.9278142352347298e-05, |
|
"loss": 0.0531, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.7920242301867744, |
|
"grad_norm": 0.6069557666778564, |
|
"learning_rate": 1.9247854618879354e-05, |
|
"loss": 0.0663, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.7970721857647654, |
|
"grad_norm": 0.859195351600647, |
|
"learning_rate": 1.9217566885411408e-05, |
|
"loss": 0.0539, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.802120141342756, |
|
"grad_norm": 0.8939780592918396, |
|
"learning_rate": 1.9187279151943465e-05, |
|
"loss": 0.0668, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.807168096920747, |
|
"grad_norm": 0.7258803248405457, |
|
"learning_rate": 1.9156991418475518e-05, |
|
"loss": 0.0585, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.812216052498738, |
|
"grad_norm": 0.38900288939476013, |
|
"learning_rate": 1.912670368500757e-05, |
|
"loss": 0.0686, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.8172640080767288, |
|
"grad_norm": 0.38506415486335754, |
|
"learning_rate": 1.909641595153963e-05, |
|
"loss": 0.0625, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8223119636547198, |
|
"grad_norm": 0.5235381722450256, |
|
"learning_rate": 1.9066128218071682e-05, |
|
"loss": 0.0597, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.8273599192327108, |
|
"grad_norm": 0.4835253357887268, |
|
"learning_rate": 1.903584048460374e-05, |
|
"loss": 0.0667, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.8324078748107016, |
|
"grad_norm": 0.6338971257209778, |
|
"learning_rate": 1.9005552751135792e-05, |
|
"loss": 0.0635, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.8374558303886925, |
|
"grad_norm": 1.0663739442825317, |
|
"learning_rate": 1.8975265017667846e-05, |
|
"loss": 0.0744, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.8425037859666835, |
|
"grad_norm": 0.6655123829841614, |
|
"learning_rate": 1.89449772841999e-05, |
|
"loss": 0.0654, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8475517415446743, |
|
"grad_norm": 0.582611083984375, |
|
"learning_rate": 1.8914689550731952e-05, |
|
"loss": 0.0661, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.8525996971226655, |
|
"grad_norm": 0.6533240079879761, |
|
"learning_rate": 1.888440181726401e-05, |
|
"loss": 0.0613, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.8576476527006562, |
|
"grad_norm": 0.4978090524673462, |
|
"learning_rate": 1.8854114083796063e-05, |
|
"loss": 0.0627, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.862695608278647, |
|
"grad_norm": 0.7043678164482117, |
|
"learning_rate": 1.8823826350328116e-05, |
|
"loss": 0.0578, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.8677435638566382, |
|
"grad_norm": 0.7941015362739563, |
|
"learning_rate": 1.8793538616860173e-05, |
|
"loss": 0.0622, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.872791519434629, |
|
"grad_norm": 0.4428146183490753, |
|
"learning_rate": 1.8763250883392226e-05, |
|
"loss": 0.0613, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.87783947501262, |
|
"grad_norm": 0.6554248929023743, |
|
"learning_rate": 1.873296314992428e-05, |
|
"loss": 0.0643, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.882887430590611, |
|
"grad_norm": 0.48168087005615234, |
|
"learning_rate": 1.8702675416456337e-05, |
|
"loss": 0.055, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.8879353861686017, |
|
"grad_norm": 0.509777307510376, |
|
"learning_rate": 1.867238768298839e-05, |
|
"loss": 0.058, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.8929833417465927, |
|
"grad_norm": 0.5132505893707275, |
|
"learning_rate": 1.8642099949520447e-05, |
|
"loss": 0.0623, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.8980312973245836, |
|
"grad_norm": 0.7474920749664307, |
|
"learning_rate": 1.86118122160525e-05, |
|
"loss": 0.0489, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.9030792529025744, |
|
"grad_norm": 1.0404279232025146, |
|
"learning_rate": 1.8581524482584554e-05, |
|
"loss": 0.0687, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.9081272084805654, |
|
"grad_norm": 0.6796401143074036, |
|
"learning_rate": 1.8551236749116607e-05, |
|
"loss": 0.0679, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.9131751640585564, |
|
"grad_norm": 0.9071604609489441, |
|
"learning_rate": 1.852094901564866e-05, |
|
"loss": 0.0725, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.9182231196365471, |
|
"grad_norm": 0.7023878693580627, |
|
"learning_rate": 1.8490661282180718e-05, |
|
"loss": 0.0702, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.923271075214538, |
|
"grad_norm": 0.7312602996826172, |
|
"learning_rate": 1.846037354871277e-05, |
|
"loss": 0.0532, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.928319030792529, |
|
"grad_norm": 0.6224806904792786, |
|
"learning_rate": 1.8430085815244825e-05, |
|
"loss": 0.0638, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.9333669863705198, |
|
"grad_norm": 0.7255429029464722, |
|
"learning_rate": 1.839979808177688e-05, |
|
"loss": 0.0641, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.9384149419485108, |
|
"grad_norm": 0.584086000919342, |
|
"learning_rate": 1.8369510348308935e-05, |
|
"loss": 0.0692, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.9434628975265018, |
|
"grad_norm": 0.4826408326625824, |
|
"learning_rate": 1.833922261484099e-05, |
|
"loss": 0.0627, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.9485108531044926, |
|
"grad_norm": 0.5803766846656799, |
|
"learning_rate": 1.8308934881373045e-05, |
|
"loss": 0.0635, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.9535588086824835, |
|
"grad_norm": 0.7855948209762573, |
|
"learning_rate": 1.82786471479051e-05, |
|
"loss": 0.0659, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.9586067642604745, |
|
"grad_norm": 0.5980962514877319, |
|
"learning_rate": 1.8248359414437155e-05, |
|
"loss": 0.0651, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.9636547198384653, |
|
"grad_norm": 0.6440220475196838, |
|
"learning_rate": 1.821807168096921e-05, |
|
"loss": 0.0639, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.9687026754164565, |
|
"grad_norm": 0.7104585766792297, |
|
"learning_rate": 1.8187783947501262e-05, |
|
"loss": 0.056, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.9737506309944473, |
|
"grad_norm": 0.7219833731651306, |
|
"learning_rate": 1.8157496214033316e-05, |
|
"loss": 0.0574, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.978798586572438, |
|
"grad_norm": 0.5478711724281311, |
|
"learning_rate": 1.812720848056537e-05, |
|
"loss": 0.0657, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.9838465421504292, |
|
"grad_norm": 0.6501402854919434, |
|
"learning_rate": 1.8096920747097426e-05, |
|
"loss": 0.0641, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.98889449772842, |
|
"grad_norm": 0.7231020331382751, |
|
"learning_rate": 1.806663301362948e-05, |
|
"loss": 0.0692, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.993942453306411, |
|
"grad_norm": 0.6480854749679565, |
|
"learning_rate": 1.8036345280161536e-05, |
|
"loss": 0.0632, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.998990408884402, |
|
"grad_norm": 0.4803590774536133, |
|
"learning_rate": 1.800605754669359e-05, |
|
"loss": 0.0678, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9705180789481339, |
|
"eval_loss": 0.0446692518889904, |
|
"eval_runtime": 584.4017, |
|
"eval_samples_per_second": 352.946, |
|
"eval_steps_per_second": 2.758, |
|
"step": 3962 |
|
}, |
|
{ |
|
"epoch": 2.0040383644623927, |
|
"grad_norm": 0.680855393409729, |
|
"learning_rate": 1.7975769813225643e-05, |
|
"loss": 0.0567, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.0090863200403835, |
|
"grad_norm": 0.47991836071014404, |
|
"learning_rate": 1.79454820797577e-05, |
|
"loss": 0.0562, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.0141342756183747, |
|
"grad_norm": 0.8615912199020386, |
|
"learning_rate": 1.7915194346289753e-05, |
|
"loss": 0.0679, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.0191822311963654, |
|
"grad_norm": 0.5970327258110046, |
|
"learning_rate": 1.7884906612821807e-05, |
|
"loss": 0.053, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.024230186774356, |
|
"grad_norm": 0.5402255654335022, |
|
"learning_rate": 1.7854618879353864e-05, |
|
"loss": 0.0574, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.0292781423523474, |
|
"grad_norm": 0.5014840364456177, |
|
"learning_rate": 1.7824331145885917e-05, |
|
"loss": 0.0649, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.034326097930338, |
|
"grad_norm": 0.7147154808044434, |
|
"learning_rate": 1.779404341241797e-05, |
|
"loss": 0.0687, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.0393740535083293, |
|
"grad_norm": 0.5346552729606628, |
|
"learning_rate": 1.7763755678950024e-05, |
|
"loss": 0.0638, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.04442200908632, |
|
"grad_norm": 0.5596599578857422, |
|
"learning_rate": 1.7733467945482078e-05, |
|
"loss": 0.0669, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.049469964664311, |
|
"grad_norm": 0.40591198205947876, |
|
"learning_rate": 1.7703180212014134e-05, |
|
"loss": 0.0564, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.054517920242302, |
|
"grad_norm": 0.609337568283081, |
|
"learning_rate": 1.7672892478546188e-05, |
|
"loss": 0.0576, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.059565875820293, |
|
"grad_norm": 0.5424002408981323, |
|
"learning_rate": 1.7642604745078245e-05, |
|
"loss": 0.0585, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.0646138313982836, |
|
"grad_norm": 0.9868631362915039, |
|
"learning_rate": 1.7612317011610298e-05, |
|
"loss": 0.0684, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.069661786976275, |
|
"grad_norm": 0.6492929458618164, |
|
"learning_rate": 1.758202927814235e-05, |
|
"loss": 0.0638, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.0747097425542655, |
|
"grad_norm": 0.7837685346603394, |
|
"learning_rate": 1.755174154467441e-05, |
|
"loss": 0.0675, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.0797576981322563, |
|
"grad_norm": 0.5961639881134033, |
|
"learning_rate": 1.7521453811206462e-05, |
|
"loss": 0.0575, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.0848056537102475, |
|
"grad_norm": 0.4114825427532196, |
|
"learning_rate": 1.749116607773852e-05, |
|
"loss": 0.0659, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.0898536092882383, |
|
"grad_norm": 0.4567316174507141, |
|
"learning_rate": 1.7460878344270572e-05, |
|
"loss": 0.0661, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.094901564866229, |
|
"grad_norm": 0.6321776509284973, |
|
"learning_rate": 1.7430590610802626e-05, |
|
"loss": 0.066, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.0999495204442202, |
|
"grad_norm": 0.8911116719245911, |
|
"learning_rate": 1.740030287733468e-05, |
|
"loss": 0.0585, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.104997476022211, |
|
"grad_norm": 0.4896914064884186, |
|
"learning_rate": 1.7370015143866733e-05, |
|
"loss": 0.0612, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.1100454316002017, |
|
"grad_norm": 0.7571251392364502, |
|
"learning_rate": 1.733972741039879e-05, |
|
"loss": 0.0563, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.115093387178193, |
|
"grad_norm": 0.9115099310874939, |
|
"learning_rate": 1.7309439676930843e-05, |
|
"loss": 0.0698, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.1201413427561837, |
|
"grad_norm": 0.5267325639724731, |
|
"learning_rate": 1.7279151943462896e-05, |
|
"loss": 0.0604, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.1251892983341745, |
|
"grad_norm": 0.6659255623817444, |
|
"learning_rate": 1.7248864209994953e-05, |
|
"loss": 0.0627, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.1302372539121657, |
|
"grad_norm": 0.89178466796875, |
|
"learning_rate": 1.7218576476527007e-05, |
|
"loss": 0.0552, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.1352852094901564, |
|
"grad_norm": 0.4615127742290497, |
|
"learning_rate": 1.7188288743059063e-05, |
|
"loss": 0.0557, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.1403331650681476, |
|
"grad_norm": 0.6602596044540405, |
|
"learning_rate": 1.7158001009591117e-05, |
|
"loss": 0.0548, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.1453811206461384, |
|
"grad_norm": 0.7081389427185059, |
|
"learning_rate": 1.712771327612317e-05, |
|
"loss": 0.0606, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.150429076224129, |
|
"grad_norm": 0.5817338824272156, |
|
"learning_rate": 1.7097425542655227e-05, |
|
"loss": 0.0606, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.1554770318021204, |
|
"grad_norm": 0.4401390254497528, |
|
"learning_rate": 1.706713780918728e-05, |
|
"loss": 0.0607, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.160524987380111, |
|
"grad_norm": 1.0127087831497192, |
|
"learning_rate": 1.7036850075719337e-05, |
|
"loss": 0.0615, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.165572942958102, |
|
"grad_norm": 0.5774319171905518, |
|
"learning_rate": 1.7006562342251387e-05, |
|
"loss": 0.0525, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.170620898536093, |
|
"grad_norm": 0.47623270750045776, |
|
"learning_rate": 1.697627460878344e-05, |
|
"loss": 0.0591, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.175668854114084, |
|
"grad_norm": 0.7083358764648438, |
|
"learning_rate": 1.6945986875315498e-05, |
|
"loss": 0.0631, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.1807168096920746, |
|
"grad_norm": 0.6057601571083069, |
|
"learning_rate": 1.691569914184755e-05, |
|
"loss": 0.0595, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.185764765270066, |
|
"grad_norm": 0.8947880864143372, |
|
"learning_rate": 1.6885411408379605e-05, |
|
"loss": 0.0666, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.1908127208480566, |
|
"grad_norm": 0.6460204720497131, |
|
"learning_rate": 1.685512367491166e-05, |
|
"loss": 0.0669, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.1958606764260473, |
|
"grad_norm": 0.9029686450958252, |
|
"learning_rate": 1.6824835941443715e-05, |
|
"loss": 0.0607, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.2009086320040385, |
|
"grad_norm": 0.5201438665390015, |
|
"learning_rate": 1.6794548207975772e-05, |
|
"loss": 0.0514, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.2059565875820293, |
|
"grad_norm": 0.39414748549461365, |
|
"learning_rate": 1.6764260474507825e-05, |
|
"loss": 0.0581, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.21100454316002, |
|
"grad_norm": 0.642257034778595, |
|
"learning_rate": 1.673397274103988e-05, |
|
"loss": 0.0611, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.2160524987380112, |
|
"grad_norm": 0.7225739359855652, |
|
"learning_rate": 1.6703685007571935e-05, |
|
"loss": 0.0569, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.221100454316002, |
|
"grad_norm": 0.6948502659797668, |
|
"learning_rate": 1.667339727410399e-05, |
|
"loss": 0.0652, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.2261484098939928, |
|
"grad_norm": 0.5755937695503235, |
|
"learning_rate": 1.6643109540636042e-05, |
|
"loss": 0.0566, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.231196365471984, |
|
"grad_norm": 0.4249815046787262, |
|
"learning_rate": 1.6612821807168096e-05, |
|
"loss": 0.0642, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.2362443210499747, |
|
"grad_norm": 0.5442089438438416, |
|
"learning_rate": 1.658253407370015e-05, |
|
"loss": 0.0685, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.2412922766279655, |
|
"grad_norm": 0.8074495792388916, |
|
"learning_rate": 1.6552246340232206e-05, |
|
"loss": 0.0558, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.2463402322059567, |
|
"grad_norm": 0.8810071349143982, |
|
"learning_rate": 1.652195860676426e-05, |
|
"loss": 0.0685, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.2513881877839474, |
|
"grad_norm": 0.5399377942085266, |
|
"learning_rate": 1.6491670873296316e-05, |
|
"loss": 0.0607, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.256436143361938, |
|
"grad_norm": 0.7178535461425781, |
|
"learning_rate": 1.646138313982837e-05, |
|
"loss": 0.0504, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.2614840989399294, |
|
"grad_norm": 0.4272046983242035, |
|
"learning_rate": 1.6431095406360423e-05, |
|
"loss": 0.0583, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.26653205451792, |
|
"grad_norm": 0.6807524561882019, |
|
"learning_rate": 1.640080767289248e-05, |
|
"loss": 0.0639, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.271580010095911, |
|
"grad_norm": 0.5895000100135803, |
|
"learning_rate": 1.6370519939424534e-05, |
|
"loss": 0.0675, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.276627965673902, |
|
"grad_norm": 0.6640876531600952, |
|
"learning_rate": 1.634023220595659e-05, |
|
"loss": 0.0603, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.281675921251893, |
|
"grad_norm": 0.4367890954017639, |
|
"learning_rate": 1.6309944472488644e-05, |
|
"loss": 0.0517, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.2867238768298837, |
|
"grad_norm": 1.082713007926941, |
|
"learning_rate": 1.6279656739020697e-05, |
|
"loss": 0.0524, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.291771832407875, |
|
"grad_norm": 0.5186300277709961, |
|
"learning_rate": 1.624936900555275e-05, |
|
"loss": 0.0566, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.2968197879858656, |
|
"grad_norm": 1.2778280973434448, |
|
"learning_rate": 1.6219081272084804e-05, |
|
"loss": 0.0531, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.301867743563857, |
|
"grad_norm": 0.46757417917251587, |
|
"learning_rate": 1.618879353861686e-05, |
|
"loss": 0.0637, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.3069156991418476, |
|
"grad_norm": 0.6333388686180115, |
|
"learning_rate": 1.6158505805148914e-05, |
|
"loss": 0.0557, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.3119636547198383, |
|
"grad_norm": 0.4005846381187439, |
|
"learning_rate": 1.6128218071680968e-05, |
|
"loss": 0.0512, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.3170116102978295, |
|
"grad_norm": 1.0479962825775146, |
|
"learning_rate": 1.6097930338213025e-05, |
|
"loss": 0.0639, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.3220595658758203, |
|
"grad_norm": 1.1324669122695923, |
|
"learning_rate": 1.6067642604745078e-05, |
|
"loss": 0.0642, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.327107521453811, |
|
"grad_norm": 0.827215313911438, |
|
"learning_rate": 1.6037354871277135e-05, |
|
"loss": 0.0654, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.3321554770318023, |
|
"grad_norm": 0.8228656649589539, |
|
"learning_rate": 1.600706713780919e-05, |
|
"loss": 0.0648, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.337203432609793, |
|
"grad_norm": 0.5897762775421143, |
|
"learning_rate": 1.5976779404341242e-05, |
|
"loss": 0.0546, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.342251388187784, |
|
"grad_norm": 0.6223641633987427, |
|
"learning_rate": 1.59464916708733e-05, |
|
"loss": 0.0712, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.347299343765775, |
|
"grad_norm": 0.5593187808990479, |
|
"learning_rate": 1.5916203937405352e-05, |
|
"loss": 0.0707, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.3523472993437657, |
|
"grad_norm": 0.9349427223205566, |
|
"learning_rate": 1.5885916203937406e-05, |
|
"loss": 0.0581, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.3573952549217565, |
|
"grad_norm": 0.47101134061813354, |
|
"learning_rate": 1.585562847046946e-05, |
|
"loss": 0.0688, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.3624432104997477, |
|
"grad_norm": 0.5073738098144531, |
|
"learning_rate": 1.5825340737001513e-05, |
|
"loss": 0.0678, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.3674911660777385, |
|
"grad_norm": 0.5324171781539917, |
|
"learning_rate": 1.579505300353357e-05, |
|
"loss": 0.0614, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.3725391216557297, |
|
"grad_norm": 0.662965714931488, |
|
"learning_rate": 1.5764765270065623e-05, |
|
"loss": 0.0507, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.3775870772337204, |
|
"grad_norm": 0.6482782959938049, |
|
"learning_rate": 1.5734477536597676e-05, |
|
"loss": 0.0537, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.382635032811711, |
|
"grad_norm": 1.0039052963256836, |
|
"learning_rate": 1.5704189803129733e-05, |
|
"loss": 0.059, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.3876829883897024, |
|
"grad_norm": 0.8546132445335388, |
|
"learning_rate": 1.5673902069661787e-05, |
|
"loss": 0.0691, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.392730943967693, |
|
"grad_norm": 0.4903261363506317, |
|
"learning_rate": 1.5643614336193843e-05, |
|
"loss": 0.0535, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.397778899545684, |
|
"grad_norm": 0.8538033962249756, |
|
"learning_rate": 1.5613326602725897e-05, |
|
"loss": 0.0616, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.402826855123675, |
|
"grad_norm": 0.7978336215019226, |
|
"learning_rate": 1.558303886925795e-05, |
|
"loss": 0.0613, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.407874810701666, |
|
"grad_norm": 0.6981778740882874, |
|
"learning_rate": 1.5552751135790007e-05, |
|
"loss": 0.0646, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.4129227662796566, |
|
"grad_norm": 0.8517895936965942, |
|
"learning_rate": 1.552246340232206e-05, |
|
"loss": 0.0705, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.417970721857648, |
|
"grad_norm": 0.4087599813938141, |
|
"learning_rate": 1.5492175668854117e-05, |
|
"loss": 0.0638, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.4230186774356386, |
|
"grad_norm": 0.3779948651790619, |
|
"learning_rate": 1.5461887935386168e-05, |
|
"loss": 0.0524, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.4280666330136293, |
|
"grad_norm": 0.42263171076774597, |
|
"learning_rate": 1.543160020191822e-05, |
|
"loss": 0.0623, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.4331145885916206, |
|
"grad_norm": 0.5812351107597351, |
|
"learning_rate": 1.5401312468450278e-05, |
|
"loss": 0.0573, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.4381625441696113, |
|
"grad_norm": 0.6073315143585205, |
|
"learning_rate": 1.537102473498233e-05, |
|
"loss": 0.057, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.443210499747602, |
|
"grad_norm": 0.8706870079040527, |
|
"learning_rate": 1.5340737001514388e-05, |
|
"loss": 0.0606, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.4482584553255933, |
|
"grad_norm": 0.9355966448783875, |
|
"learning_rate": 1.531044926804644e-05, |
|
"loss": 0.0563, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.453306410903584, |
|
"grad_norm": 0.6352431774139404, |
|
"learning_rate": 1.5280161534578495e-05, |
|
"loss": 0.0537, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.458354366481575, |
|
"grad_norm": 0.5970965623855591, |
|
"learning_rate": 1.524987380111055e-05, |
|
"loss": 0.0663, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.463402322059566, |
|
"grad_norm": 0.40907353162765503, |
|
"learning_rate": 1.5219586067642605e-05, |
|
"loss": 0.0502, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.4684502776375568, |
|
"grad_norm": 0.5130166411399841, |
|
"learning_rate": 1.518929833417466e-05, |
|
"loss": 0.0538, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.4734982332155475, |
|
"grad_norm": 0.9824861288070679, |
|
"learning_rate": 1.5159010600706716e-05, |
|
"loss": 0.0518, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.4785461887935387, |
|
"grad_norm": 0.6424157023429871, |
|
"learning_rate": 1.512872286723877e-05, |
|
"loss": 0.0599, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.4835941443715295, |
|
"grad_norm": 0.8797338008880615, |
|
"learning_rate": 1.5098435133770824e-05, |
|
"loss": 0.0534, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.4886420999495202, |
|
"grad_norm": 1.0275185108184814, |
|
"learning_rate": 1.5068147400302876e-05, |
|
"loss": 0.063, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.4936900555275114, |
|
"grad_norm": 0.6370276808738708, |
|
"learning_rate": 1.5037859666834931e-05, |
|
"loss": 0.0584, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.498738011105502, |
|
"grad_norm": 0.5083595514297485, |
|
"learning_rate": 1.5007571933366986e-05, |
|
"loss": 0.0635, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.503785966683493, |
|
"grad_norm": 0.8423396348953247, |
|
"learning_rate": 1.4977284199899041e-05, |
|
"loss": 0.0593, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.508833922261484, |
|
"grad_norm": 0.6133778691291809, |
|
"learning_rate": 1.4946996466431095e-05, |
|
"loss": 0.0652, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.513881877839475, |
|
"grad_norm": 0.5626839995384216, |
|
"learning_rate": 1.491670873296315e-05, |
|
"loss": 0.061, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.5189298334174657, |
|
"grad_norm": 0.6379786729812622, |
|
"learning_rate": 1.4886420999495205e-05, |
|
"loss": 0.0583, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.523977788995457, |
|
"grad_norm": 0.39859360456466675, |
|
"learning_rate": 1.485613326602726e-05, |
|
"loss": 0.057, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5290257445734476, |
|
"grad_norm": 0.4674101173877716, |
|
"learning_rate": 1.4825845532559315e-05, |
|
"loss": 0.0584, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.5340737001514384, |
|
"grad_norm": 0.6018111705780029, |
|
"learning_rate": 1.4795557799091367e-05, |
|
"loss": 0.0606, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.5391216557294296, |
|
"grad_norm": 0.4932622015476227, |
|
"learning_rate": 1.4765270065623422e-05, |
|
"loss": 0.0551, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.5441696113074204, |
|
"grad_norm": 0.5576731562614441, |
|
"learning_rate": 1.4734982332155477e-05, |
|
"loss": 0.0562, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.5492175668854116, |
|
"grad_norm": 0.5910426378250122, |
|
"learning_rate": 1.4704694598687533e-05, |
|
"loss": 0.0632, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.5542655224634023, |
|
"grad_norm": 0.42830216884613037, |
|
"learning_rate": 1.4674406865219586e-05, |
|
"loss": 0.0589, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.559313478041393, |
|
"grad_norm": 0.657305896282196, |
|
"learning_rate": 1.4644119131751641e-05, |
|
"loss": 0.0666, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.5643614336193843, |
|
"grad_norm": 0.5498583912849426, |
|
"learning_rate": 1.4613831398283696e-05, |
|
"loss": 0.0677, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.569409389197375, |
|
"grad_norm": 1.5641086101531982, |
|
"learning_rate": 1.458354366481575e-05, |
|
"loss": 0.0618, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.5744573447753663, |
|
"grad_norm": 0.576878011226654, |
|
"learning_rate": 1.4553255931347805e-05, |
|
"loss": 0.0596, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.579505300353357, |
|
"grad_norm": 0.6855084896087646, |
|
"learning_rate": 1.4522968197879858e-05, |
|
"loss": 0.0684, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.5845532559313478, |
|
"grad_norm": 0.46760818362236023, |
|
"learning_rate": 1.4492680464411913e-05, |
|
"loss": 0.0628, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.589601211509339, |
|
"grad_norm": 0.4708857834339142, |
|
"learning_rate": 1.4462392730943969e-05, |
|
"loss": 0.0656, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.5946491670873297, |
|
"grad_norm": 0.957336962223053, |
|
"learning_rate": 1.4432104997476024e-05, |
|
"loss": 0.0527, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.5996971226653205, |
|
"grad_norm": 0.6079381704330444, |
|
"learning_rate": 1.4401817264008077e-05, |
|
"loss": 0.0499, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.6047450782433117, |
|
"grad_norm": 0.644965410232544, |
|
"learning_rate": 1.437152953054013e-05, |
|
"loss": 0.0567, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.6097930338213025, |
|
"grad_norm": 0.9058682322502136, |
|
"learning_rate": 1.4341241797072186e-05, |
|
"loss": 0.059, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.614840989399293, |
|
"grad_norm": 0.6784061789512634, |
|
"learning_rate": 1.4310954063604241e-05, |
|
"loss": 0.0577, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.6198889449772844, |
|
"grad_norm": 0.7699759602546692, |
|
"learning_rate": 1.4280666330136296e-05, |
|
"loss": 0.056, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.624936900555275, |
|
"grad_norm": 1.0204094648361206, |
|
"learning_rate": 1.425037859666835e-05, |
|
"loss": 0.0595, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.629984856133266, |
|
"grad_norm": 0.3317660987377167, |
|
"learning_rate": 1.4220090863200403e-05, |
|
"loss": 0.0579, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.635032811711257, |
|
"grad_norm": 0.7586853504180908, |
|
"learning_rate": 1.4189803129732458e-05, |
|
"loss": 0.0612, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.640080767289248, |
|
"grad_norm": 0.43295013904571533, |
|
"learning_rate": 1.4159515396264513e-05, |
|
"loss": 0.0584, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.6451287228672387, |
|
"grad_norm": 0.9083705544471741, |
|
"learning_rate": 1.4129227662796568e-05, |
|
"loss": 0.0698, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.65017667844523, |
|
"grad_norm": 0.6299885511398315, |
|
"learning_rate": 1.4098939929328622e-05, |
|
"loss": 0.0602, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.6552246340232206, |
|
"grad_norm": 0.538589358329773, |
|
"learning_rate": 1.4068652195860677e-05, |
|
"loss": 0.0634, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.6602725896012114, |
|
"grad_norm": 0.5712538361549377, |
|
"learning_rate": 1.4038364462392732e-05, |
|
"loss": 0.0625, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.6653205451792026, |
|
"grad_norm": 0.5739433765411377, |
|
"learning_rate": 1.4008076728924786e-05, |
|
"loss": 0.0647, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.6703685007571933, |
|
"grad_norm": 0.5050386786460876, |
|
"learning_rate": 1.397778899545684e-05, |
|
"loss": 0.0592, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.675416456335184, |
|
"grad_norm": 0.41851407289505005, |
|
"learning_rate": 1.3947501261988894e-05, |
|
"loss": 0.0581, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.6804644119131753, |
|
"grad_norm": 0.5866436958312988, |
|
"learning_rate": 1.391721352852095e-05, |
|
"loss": 0.0656, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.685512367491166, |
|
"grad_norm": 0.47498345375061035, |
|
"learning_rate": 1.3886925795053004e-05, |
|
"loss": 0.0657, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.690560323069157, |
|
"grad_norm": 0.5748500227928162, |
|
"learning_rate": 1.385663806158506e-05, |
|
"loss": 0.0588, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.695608278647148, |
|
"grad_norm": 0.685787558555603, |
|
"learning_rate": 1.3826350328117113e-05, |
|
"loss": 0.0621, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.700656234225139, |
|
"grad_norm": 0.5321753025054932, |
|
"learning_rate": 1.3796062594649166e-05, |
|
"loss": 0.0665, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.7057041898031295, |
|
"grad_norm": 0.4687628746032715, |
|
"learning_rate": 1.3765774861181222e-05, |
|
"loss": 0.0622, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.7107521453811207, |
|
"grad_norm": 0.6931032538414001, |
|
"learning_rate": 1.3735487127713277e-05, |
|
"loss": 0.0542, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.7158001009591115, |
|
"grad_norm": 0.6347541213035583, |
|
"learning_rate": 1.3705199394245332e-05, |
|
"loss": 0.0618, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.7208480565371023, |
|
"grad_norm": 0.5090097188949585, |
|
"learning_rate": 1.3674911660777385e-05, |
|
"loss": 0.0577, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.7258960121150935, |
|
"grad_norm": 0.557161808013916, |
|
"learning_rate": 1.3644623927309439e-05, |
|
"loss": 0.0485, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.7309439676930842, |
|
"grad_norm": 0.7229135036468506, |
|
"learning_rate": 1.3614336193841494e-05, |
|
"loss": 0.0642, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.735991923271075, |
|
"grad_norm": 0.7802084684371948, |
|
"learning_rate": 1.3584048460373549e-05, |
|
"loss": 0.0721, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.741039878849066, |
|
"grad_norm": 0.8350520730018616, |
|
"learning_rate": 1.3553760726905604e-05, |
|
"loss": 0.05, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.746087834427057, |
|
"grad_norm": 0.24809196591377258, |
|
"learning_rate": 1.3523472993437658e-05, |
|
"loss": 0.0577, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.7511357900050477, |
|
"grad_norm": 0.5501554608345032, |
|
"learning_rate": 1.3493185259969713e-05, |
|
"loss": 0.0613, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.756183745583039, |
|
"grad_norm": 0.6459994912147522, |
|
"learning_rate": 1.3462897526501768e-05, |
|
"loss": 0.0545, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.7612317011610297, |
|
"grad_norm": 1.0892735719680786, |
|
"learning_rate": 1.3432609793033821e-05, |
|
"loss": 0.0517, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.7662796567390204, |
|
"grad_norm": 0.8553361296653748, |
|
"learning_rate": 1.3402322059565877e-05, |
|
"loss": 0.055, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.7713276123170116, |
|
"grad_norm": 0.5909534692764282, |
|
"learning_rate": 1.337203432609793e-05, |
|
"loss": 0.0583, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.7763755678950024, |
|
"grad_norm": 0.3620651662349701, |
|
"learning_rate": 1.3341746592629985e-05, |
|
"loss": 0.053, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.7814235234729936, |
|
"grad_norm": 0.6525430083274841, |
|
"learning_rate": 1.331145885916204e-05, |
|
"loss": 0.0667, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.7864714790509844, |
|
"grad_norm": 0.6129066944122314, |
|
"learning_rate": 1.3281171125694095e-05, |
|
"loss": 0.0578, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.791519434628975, |
|
"grad_norm": 0.6374188661575317, |
|
"learning_rate": 1.3250883392226147e-05, |
|
"loss": 0.0598, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.7965673902069663, |
|
"grad_norm": 0.6404274702072144, |
|
"learning_rate": 1.3220595658758202e-05, |
|
"loss": 0.064, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.801615345784957, |
|
"grad_norm": 0.3882500231266022, |
|
"learning_rate": 1.3190307925290257e-05, |
|
"loss": 0.0556, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.8066633013629483, |
|
"grad_norm": 0.827498197555542, |
|
"learning_rate": 1.3160020191822313e-05, |
|
"loss": 0.056, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.811711256940939, |
|
"grad_norm": 0.5474889874458313, |
|
"learning_rate": 1.3129732458354368e-05, |
|
"loss": 0.0559, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.81675921251893, |
|
"grad_norm": 0.7505003809928894, |
|
"learning_rate": 1.3099444724886421e-05, |
|
"loss": 0.0562, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.821807168096921, |
|
"grad_norm": 0.7723977565765381, |
|
"learning_rate": 1.3069156991418476e-05, |
|
"loss": 0.0711, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.8268551236749118, |
|
"grad_norm": 0.5930567979812622, |
|
"learning_rate": 1.303886925795053e-05, |
|
"loss": 0.0666, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.8319030792529025, |
|
"grad_norm": 0.9205801486968994, |
|
"learning_rate": 1.3008581524482585e-05, |
|
"loss": 0.0635, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.8369510348308937, |
|
"grad_norm": 0.6520891189575195, |
|
"learning_rate": 1.297829379101464e-05, |
|
"loss": 0.0503, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.8419989904088845, |
|
"grad_norm": 0.697742760181427, |
|
"learning_rate": 1.2948006057546693e-05, |
|
"loss": 0.0527, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.8470469459868752, |
|
"grad_norm": 0.5600337386131287, |
|
"learning_rate": 1.2917718324078749e-05, |
|
"loss": 0.0658, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.8520949015648664, |
|
"grad_norm": 0.7648780941963196, |
|
"learning_rate": 1.2887430590610804e-05, |
|
"loss": 0.0503, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.44580090045928955, |
|
"learning_rate": 1.2857142857142857e-05, |
|
"loss": 0.0569, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.862190812720848, |
|
"grad_norm": 0.6274628043174744, |
|
"learning_rate": 1.2826855123674912e-05, |
|
"loss": 0.0544, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.867238768298839, |
|
"grad_norm": 0.5967713594436646, |
|
"learning_rate": 1.2796567390206966e-05, |
|
"loss": 0.049, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.87228672387683, |
|
"grad_norm": 0.49563518166542053, |
|
"learning_rate": 1.2766279656739021e-05, |
|
"loss": 0.0637, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.8773346794548207, |
|
"grad_norm": 0.5065841674804688, |
|
"learning_rate": 1.2735991923271076e-05, |
|
"loss": 0.0635, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.882382635032812, |
|
"grad_norm": 0.4228837490081787, |
|
"learning_rate": 1.2705704189803131e-05, |
|
"loss": 0.0561, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.8874305906108026, |
|
"grad_norm": 0.36254429817199707, |
|
"learning_rate": 1.2675416456335183e-05, |
|
"loss": 0.0564, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.8924785461887934, |
|
"grad_norm": 0.6964749097824097, |
|
"learning_rate": 1.2645128722867238e-05, |
|
"loss": 0.0566, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.8975265017667846, |
|
"grad_norm": 1.2399131059646606, |
|
"learning_rate": 1.2614840989399293e-05, |
|
"loss": 0.0528, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.9025744573447754, |
|
"grad_norm": 0.45011046528816223, |
|
"learning_rate": 1.2584553255931348e-05, |
|
"loss": 0.0605, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.907622412922766, |
|
"grad_norm": 0.6450422406196594, |
|
"learning_rate": 1.2554265522463404e-05, |
|
"loss": 0.0579, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.9126703685007573, |
|
"grad_norm": 0.6685008406639099, |
|
"learning_rate": 1.2523977788995457e-05, |
|
"loss": 0.0596, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.917718324078748, |
|
"grad_norm": 0.7710725665092468, |
|
"learning_rate": 1.2493690055527512e-05, |
|
"loss": 0.063, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.922766279656739, |
|
"grad_norm": 0.6229269504547119, |
|
"learning_rate": 1.2463402322059566e-05, |
|
"loss": 0.0542, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.92781423523473, |
|
"grad_norm": 0.41364407539367676, |
|
"learning_rate": 1.243311458859162e-05, |
|
"loss": 0.0588, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.932862190812721, |
|
"grad_norm": 0.5546961426734924, |
|
"learning_rate": 1.2402826855123676e-05, |
|
"loss": 0.0607, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.9379101463907116, |
|
"grad_norm": 0.6814476251602173, |
|
"learning_rate": 1.237253912165573e-05, |
|
"loss": 0.0587, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.9429581019687028, |
|
"grad_norm": 0.7745892405509949, |
|
"learning_rate": 1.2342251388187784e-05, |
|
"loss": 0.0484, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.9480060575466935, |
|
"grad_norm": 0.9947149157524109, |
|
"learning_rate": 1.231196365471984e-05, |
|
"loss": 0.056, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.9530540131246843, |
|
"grad_norm": 0.599892258644104, |
|
"learning_rate": 1.2281675921251893e-05, |
|
"loss": 0.0603, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.9581019687026755, |
|
"grad_norm": 0.4991750121116638, |
|
"learning_rate": 1.2251388187783947e-05, |
|
"loss": 0.0603, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.9631499242806663, |
|
"grad_norm": 0.44697603583335876, |
|
"learning_rate": 1.2221100454316002e-05, |
|
"loss": 0.0614, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.968197879858657, |
|
"grad_norm": 0.34608447551727295, |
|
"learning_rate": 1.2190812720848057e-05, |
|
"loss": 0.0633, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.973245835436648, |
|
"grad_norm": 0.6991161108016968, |
|
"learning_rate": 1.2160524987380112e-05, |
|
"loss": 0.0713, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.978293791014639, |
|
"grad_norm": 0.7053156495094299, |
|
"learning_rate": 1.2130237253912167e-05, |
|
"loss": 0.0642, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.9833417465926297, |
|
"grad_norm": 0.4541454315185547, |
|
"learning_rate": 1.209994952044422e-05, |
|
"loss": 0.0583, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.988389702170621, |
|
"grad_norm": 0.5963706970214844, |
|
"learning_rate": 1.2069661786976274e-05, |
|
"loss": 0.0551, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.9934376577486117, |
|
"grad_norm": 0.37611526250839233, |
|
"learning_rate": 1.2039374053508329e-05, |
|
"loss": 0.0551, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.9984856133266025, |
|
"grad_norm": 0.5949448943138123, |
|
"learning_rate": 1.2009086320040384e-05, |
|
"loss": 0.0615, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9705180789481339, |
|
"eval_loss": 0.04155249148607254, |
|
"eval_runtime": 582.0561, |
|
"eval_samples_per_second": 354.368, |
|
"eval_steps_per_second": 2.769, |
|
"step": 5943 |
|
}, |
|
{ |
|
"epoch": 3.0035335689045937, |
|
"grad_norm": 0.732612133026123, |
|
"learning_rate": 1.197879858657244e-05, |
|
"loss": 0.0473, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.0085815244825844, |
|
"grad_norm": 0.8803137540817261, |
|
"learning_rate": 1.1948510853104493e-05, |
|
"loss": 0.0513, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 3.0136294800605756, |
|
"grad_norm": 0.5578094720840454, |
|
"learning_rate": 1.1918223119636548e-05, |
|
"loss": 0.0603, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 3.0186774356385664, |
|
"grad_norm": 0.9948665499687195, |
|
"learning_rate": 1.1887935386168601e-05, |
|
"loss": 0.0592, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 3.023725391216557, |
|
"grad_norm": 0.6967259049415588, |
|
"learning_rate": 1.1857647652700657e-05, |
|
"loss": 0.0741, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 3.0287733467945483, |
|
"grad_norm": 0.48011064529418945, |
|
"learning_rate": 1.182735991923271e-05, |
|
"loss": 0.055, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.033821302372539, |
|
"grad_norm": 0.663847804069519, |
|
"learning_rate": 1.1797072185764765e-05, |
|
"loss": 0.0591, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 3.03886925795053, |
|
"grad_norm": 0.589154839515686, |
|
"learning_rate": 1.176678445229682e-05, |
|
"loss": 0.0508, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 3.043917213528521, |
|
"grad_norm": 0.7075181007385254, |
|
"learning_rate": 1.1736496718828875e-05, |
|
"loss": 0.0493, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 3.048965169106512, |
|
"grad_norm": 0.6230030655860901, |
|
"learning_rate": 1.1706208985360929e-05, |
|
"loss": 0.0589, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 3.0540131246845026, |
|
"grad_norm": 0.6204888820648193, |
|
"learning_rate": 1.1675921251892982e-05, |
|
"loss": 0.0602, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.059061080262494, |
|
"grad_norm": 0.456939160823822, |
|
"learning_rate": 1.1645633518425038e-05, |
|
"loss": 0.059, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 3.0641090358404846, |
|
"grad_norm": 0.7607660889625549, |
|
"learning_rate": 1.1615345784957093e-05, |
|
"loss": 0.0488, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 3.0691569914184753, |
|
"grad_norm": 1.2064040899276733, |
|
"learning_rate": 1.1585058051489148e-05, |
|
"loss": 0.0695, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 3.0742049469964665, |
|
"grad_norm": 0.5143324732780457, |
|
"learning_rate": 1.1554770318021203e-05, |
|
"loss": 0.0606, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 3.0792529025744573, |
|
"grad_norm": 0.6567758917808533, |
|
"learning_rate": 1.1524482584553256e-05, |
|
"loss": 0.0581, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.0843008581524485, |
|
"grad_norm": 0.7469787001609802, |
|
"learning_rate": 1.149419485108531e-05, |
|
"loss": 0.0535, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 3.0893488137304392, |
|
"grad_norm": 0.40161028504371643, |
|
"learning_rate": 1.1463907117617365e-05, |
|
"loss": 0.056, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.09439676930843, |
|
"grad_norm": 0.7404605150222778, |
|
"learning_rate": 1.143361938414942e-05, |
|
"loss": 0.0471, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 3.099444724886421, |
|
"grad_norm": 0.8587531447410583, |
|
"learning_rate": 1.1403331650681475e-05, |
|
"loss": 0.0558, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 3.104492680464412, |
|
"grad_norm": 0.424450159072876, |
|
"learning_rate": 1.1373043917213529e-05, |
|
"loss": 0.0558, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.1095406360424027, |
|
"grad_norm": 0.9383788704872131, |
|
"learning_rate": 1.1342756183745584e-05, |
|
"loss": 0.0517, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 3.114588591620394, |
|
"grad_norm": 0.8069589734077454, |
|
"learning_rate": 1.1312468450277637e-05, |
|
"loss": 0.0588, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 3.1196365471983847, |
|
"grad_norm": 0.8677689433097839, |
|
"learning_rate": 1.1282180716809692e-05, |
|
"loss": 0.0611, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 3.1246845027763754, |
|
"grad_norm": 0.7949932813644409, |
|
"learning_rate": 1.1251892983341746e-05, |
|
"loss": 0.0553, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 3.1297324583543666, |
|
"grad_norm": 0.6563514471054077, |
|
"learning_rate": 1.1221605249873801e-05, |
|
"loss": 0.0549, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.1347804139323574, |
|
"grad_norm": 0.5856168866157532, |
|
"learning_rate": 1.1191317516405856e-05, |
|
"loss": 0.0585, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 3.139828369510348, |
|
"grad_norm": 0.6840217709541321, |
|
"learning_rate": 1.1161029782937911e-05, |
|
"loss": 0.0683, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 3.1448763250883394, |
|
"grad_norm": 1.310652494430542, |
|
"learning_rate": 1.1130742049469966e-05, |
|
"loss": 0.057, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 3.14992428066633, |
|
"grad_norm": 0.6700050830841064, |
|
"learning_rate": 1.1100454316002018e-05, |
|
"loss": 0.0562, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 3.154972236244321, |
|
"grad_norm": 0.5210493803024292, |
|
"learning_rate": 1.1070166582534073e-05, |
|
"loss": 0.0545, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.160020191822312, |
|
"grad_norm": 0.44693487882614136, |
|
"learning_rate": 1.1039878849066128e-05, |
|
"loss": 0.0614, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 3.165068147400303, |
|
"grad_norm": 0.8827401995658875, |
|
"learning_rate": 1.1009591115598184e-05, |
|
"loss": 0.06, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 3.1701161029782936, |
|
"grad_norm": 0.29074421525001526, |
|
"learning_rate": 1.0979303382130239e-05, |
|
"loss": 0.059, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 3.175164058556285, |
|
"grad_norm": 0.8659618496894836, |
|
"learning_rate": 1.0949015648662292e-05, |
|
"loss": 0.0541, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 3.1802120141342756, |
|
"grad_norm": 0.8624622821807861, |
|
"learning_rate": 1.0918727915194346e-05, |
|
"loss": 0.0661, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.1852599697122663, |
|
"grad_norm": 0.6411763429641724, |
|
"learning_rate": 1.08884401817264e-05, |
|
"loss": 0.0642, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 3.1903079252902575, |
|
"grad_norm": 0.5271298289299011, |
|
"learning_rate": 1.0858152448258456e-05, |
|
"loss": 0.0552, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 3.1953558808682483, |
|
"grad_norm": 0.9701720476150513, |
|
"learning_rate": 1.082786471479051e-05, |
|
"loss": 0.0586, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 3.200403836446239, |
|
"grad_norm": 0.5633390545845032, |
|
"learning_rate": 1.0797576981322565e-05, |
|
"loss": 0.0554, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 3.2054517920242303, |
|
"grad_norm": 0.45846840739250183, |
|
"learning_rate": 1.076728924785462e-05, |
|
"loss": 0.0582, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.210499747602221, |
|
"grad_norm": 0.43338650465011597, |
|
"learning_rate": 1.0737001514386673e-05, |
|
"loss": 0.0588, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.215547703180212, |
|
"grad_norm": 0.8287716507911682, |
|
"learning_rate": 1.0706713780918728e-05, |
|
"loss": 0.053, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.220595658758203, |
|
"grad_norm": 0.5174350142478943, |
|
"learning_rate": 1.0676426047450782e-05, |
|
"loss": 0.0587, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 3.2256436143361937, |
|
"grad_norm": 0.47460228204727173, |
|
"learning_rate": 1.0646138313982837e-05, |
|
"loss": 0.0598, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 3.230691569914185, |
|
"grad_norm": 0.49122539162635803, |
|
"learning_rate": 1.0615850580514892e-05, |
|
"loss": 0.0535, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.2357395254921757, |
|
"grad_norm": 0.5462148189544678, |
|
"learning_rate": 1.0585562847046947e-05, |
|
"loss": 0.0518, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 3.2407874810701665, |
|
"grad_norm": 0.7671846747398376, |
|
"learning_rate": 1.0555275113579002e-05, |
|
"loss": 0.0611, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 3.2458354366481577, |
|
"grad_norm": 0.6748913526535034, |
|
"learning_rate": 1.0524987380111054e-05, |
|
"loss": 0.0561, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 3.2508833922261484, |
|
"grad_norm": 0.5004613399505615, |
|
"learning_rate": 1.049469964664311e-05, |
|
"loss": 0.0534, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 3.255931347804139, |
|
"grad_norm": 0.4895551800727844, |
|
"learning_rate": 1.0464411913175164e-05, |
|
"loss": 0.0459, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.2609793033821304, |
|
"grad_norm": 0.47480469942092896, |
|
"learning_rate": 1.043412417970722e-05, |
|
"loss": 0.0601, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 3.266027258960121, |
|
"grad_norm": 0.4885694086551666, |
|
"learning_rate": 1.0403836446239273e-05, |
|
"loss": 0.0598, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 3.271075214538112, |
|
"grad_norm": 0.6375486254692078, |
|
"learning_rate": 1.0373548712771328e-05, |
|
"loss": 0.0602, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 3.276123170116103, |
|
"grad_norm": 0.7264606356620789, |
|
"learning_rate": 1.0343260979303382e-05, |
|
"loss": 0.0579, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 3.281171125694094, |
|
"grad_norm": 0.5704456567764282, |
|
"learning_rate": 1.0312973245835437e-05, |
|
"loss": 0.056, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.2862190812720846, |
|
"grad_norm": 0.6324512362480164, |
|
"learning_rate": 1.0282685512367492e-05, |
|
"loss": 0.0515, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 3.291267036850076, |
|
"grad_norm": 0.5736483931541443, |
|
"learning_rate": 1.0252397778899545e-05, |
|
"loss": 0.0538, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 3.2963149924280666, |
|
"grad_norm": 0.48032522201538086, |
|
"learning_rate": 1.02221100454316e-05, |
|
"loss": 0.0568, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 3.301362948006058, |
|
"grad_norm": 0.6696997880935669, |
|
"learning_rate": 1.0191822311963656e-05, |
|
"loss": 0.0537, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.3064109035840485, |
|
"grad_norm": 0.44333356618881226, |
|
"learning_rate": 1.016153457849571e-05, |
|
"loss": 0.0514, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.3114588591620393, |
|
"grad_norm": 0.6224443912506104, |
|
"learning_rate": 1.0131246845027764e-05, |
|
"loss": 0.0607, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 3.3165068147400305, |
|
"grad_norm": 0.7066437602043152, |
|
"learning_rate": 1.0100959111559818e-05, |
|
"loss": 0.0563, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 3.3215547703180213, |
|
"grad_norm": 0.6406083106994629, |
|
"learning_rate": 1.0070671378091873e-05, |
|
"loss": 0.0573, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 3.326602725896012, |
|
"grad_norm": 0.44534462690353394, |
|
"learning_rate": 1.0040383644623928e-05, |
|
"loss": 0.059, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 3.3316506814740032, |
|
"grad_norm": 0.7137624025344849, |
|
"learning_rate": 1.0010095911155983e-05, |
|
"loss": 0.0568, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.336698637051994, |
|
"grad_norm": 0.6909269690513611, |
|
"learning_rate": 9.979808177688038e-06, |
|
"loss": 0.0493, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 3.3417465926299847, |
|
"grad_norm": 0.6987153887748718, |
|
"learning_rate": 9.94952044422009e-06, |
|
"loss": 0.059, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 3.346794548207976, |
|
"grad_norm": 0.538732647895813, |
|
"learning_rate": 9.919232710752145e-06, |
|
"loss": 0.0582, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.3518425037859667, |
|
"grad_norm": 0.6330693960189819, |
|
"learning_rate": 9.8889449772842e-06, |
|
"loss": 0.0506, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.3568904593639575, |
|
"grad_norm": 0.5216783881187439, |
|
"learning_rate": 9.858657243816255e-06, |
|
"loss": 0.0544, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.3619384149419487, |
|
"grad_norm": 0.7052462697029114, |
|
"learning_rate": 9.828369510348309e-06, |
|
"loss": 0.0553, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.3669863705199394, |
|
"grad_norm": 0.7679615616798401, |
|
"learning_rate": 9.798081776880364e-06, |
|
"loss": 0.061, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.37203432609793, |
|
"grad_norm": 0.530564546585083, |
|
"learning_rate": 9.767794043412417e-06, |
|
"loss": 0.0567, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.3770822816759214, |
|
"grad_norm": 0.6907301545143127, |
|
"learning_rate": 9.737506309944473e-06, |
|
"loss": 0.0561, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.382130237253912, |
|
"grad_norm": 0.7837420105934143, |
|
"learning_rate": 9.707218576476528e-06, |
|
"loss": 0.0618, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.387178192831903, |
|
"grad_norm": 0.6361984014511108, |
|
"learning_rate": 9.676930843008581e-06, |
|
"loss": 0.0533, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.392226148409894, |
|
"grad_norm": 0.6775834560394287, |
|
"learning_rate": 9.646643109540636e-06, |
|
"loss": 0.0571, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.397274103987885, |
|
"grad_norm": 0.4820801615715027, |
|
"learning_rate": 9.616355376072691e-06, |
|
"loss": 0.063, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.4023220595658756, |
|
"grad_norm": 0.511091411113739, |
|
"learning_rate": 9.586067642604747e-06, |
|
"loss": 0.0621, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.407370015143867, |
|
"grad_norm": 0.5163900852203369, |
|
"learning_rate": 9.5557799091368e-06, |
|
"loss": 0.0606, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.4124179707218576, |
|
"grad_norm": 0.4652441740036011, |
|
"learning_rate": 9.525492175668853e-06, |
|
"loss": 0.0539, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.4174659262998484, |
|
"grad_norm": 0.5968872904777527, |
|
"learning_rate": 9.495204442200909e-06, |
|
"loss": 0.0599, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.4225138818778396, |
|
"grad_norm": 0.4634818732738495, |
|
"learning_rate": 9.464916708732964e-06, |
|
"loss": 0.0518, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.4275618374558303, |
|
"grad_norm": 0.34169018268585205, |
|
"learning_rate": 9.434628975265019e-06, |
|
"loss": 0.0588, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.432609793033821, |
|
"grad_norm": 0.719494640827179, |
|
"learning_rate": 9.404341241797072e-06, |
|
"loss": 0.0538, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.4376577486118123, |
|
"grad_norm": 0.4465346336364746, |
|
"learning_rate": 9.374053508329126e-06, |
|
"loss": 0.0577, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.442705704189803, |
|
"grad_norm": 0.6223052740097046, |
|
"learning_rate": 9.343765774861181e-06, |
|
"loss": 0.0598, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.447753659767794, |
|
"grad_norm": 0.6854692697525024, |
|
"learning_rate": 9.313478041393236e-06, |
|
"loss": 0.0544, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.452801615345785, |
|
"grad_norm": 1.0640225410461426, |
|
"learning_rate": 9.283190307925291e-06, |
|
"loss": 0.0569, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.4578495709237758, |
|
"grad_norm": 0.5437650680541992, |
|
"learning_rate": 9.252902574457345e-06, |
|
"loss": 0.0612, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.462897526501767, |
|
"grad_norm": 0.5767130255699158, |
|
"learning_rate": 9.2226148409894e-06, |
|
"loss": 0.0618, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.4679454820797577, |
|
"grad_norm": 0.5814956426620483, |
|
"learning_rate": 9.192327107521453e-06, |
|
"loss": 0.0571, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.4729934376577485, |
|
"grad_norm": 0.31469887495040894, |
|
"learning_rate": 9.162039374053508e-06, |
|
"loss": 0.0573, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.4780413932357397, |
|
"grad_norm": 0.3987484872341156, |
|
"learning_rate": 9.131751640585563e-06, |
|
"loss": 0.0534, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.4830893488137304, |
|
"grad_norm": 0.47312065958976746, |
|
"learning_rate": 9.101463907117617e-06, |
|
"loss": 0.0608, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.488137304391721, |
|
"grad_norm": 0.4635220170021057, |
|
"learning_rate": 9.071176173649672e-06, |
|
"loss": 0.05, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.4931852599697124, |
|
"grad_norm": 1.146721363067627, |
|
"learning_rate": 9.040888440181727e-06, |
|
"loss": 0.0548, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.498233215547703, |
|
"grad_norm": 0.42057961225509644, |
|
"learning_rate": 9.010600706713782e-06, |
|
"loss": 0.0463, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 3.5032811711256944, |
|
"grad_norm": 0.7835047841072083, |
|
"learning_rate": 8.980312973245836e-06, |
|
"loss": 0.0507, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 3.508329126703685, |
|
"grad_norm": 0.6441161036491394, |
|
"learning_rate": 8.95002523977789e-06, |
|
"loss": 0.0571, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 3.513377082281676, |
|
"grad_norm": 0.6828143000602722, |
|
"learning_rate": 8.919737506309944e-06, |
|
"loss": 0.0525, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 3.518425037859667, |
|
"grad_norm": 0.8285954594612122, |
|
"learning_rate": 8.889449772842e-06, |
|
"loss": 0.0621, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 3.523472993437658, |
|
"grad_norm": 0.4954177439212799, |
|
"learning_rate": 8.859162039374055e-06, |
|
"loss": 0.0625, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 3.5285209490156486, |
|
"grad_norm": 0.7900820374488831, |
|
"learning_rate": 8.828874305906108e-06, |
|
"loss": 0.0603, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 3.53356890459364, |
|
"grad_norm": 0.6767242550849915, |
|
"learning_rate": 8.798586572438162e-06, |
|
"loss": 0.0586, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.5386168601716306, |
|
"grad_norm": 0.5408624410629272, |
|
"learning_rate": 8.768298838970217e-06, |
|
"loss": 0.0561, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 3.5436648157496213, |
|
"grad_norm": 0.4577973484992981, |
|
"learning_rate": 8.738011105502272e-06, |
|
"loss": 0.057, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 3.5487127713276125, |
|
"grad_norm": 0.7334242463111877, |
|
"learning_rate": 8.707723372034327e-06, |
|
"loss": 0.0602, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 3.5537607269056033, |
|
"grad_norm": 0.5569146275520325, |
|
"learning_rate": 8.67743563856638e-06, |
|
"loss": 0.0564, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 3.558808682483594, |
|
"grad_norm": 0.5739743709564209, |
|
"learning_rate": 8.647147905098436e-06, |
|
"loss": 0.0605, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 3.5638566380615853, |
|
"grad_norm": 0.5553867816925049, |
|
"learning_rate": 8.61686017163049e-06, |
|
"loss": 0.0573, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 3.568904593639576, |
|
"grad_norm": 0.7109550833702087, |
|
"learning_rate": 8.586572438162544e-06, |
|
"loss": 0.0634, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 3.5739525492175668, |
|
"grad_norm": 0.46534502506256104, |
|
"learning_rate": 8.5562847046946e-06, |
|
"loss": 0.0494, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.579000504795558, |
|
"grad_norm": 0.47850191593170166, |
|
"learning_rate": 8.525996971226653e-06, |
|
"loss": 0.0613, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 3.5840484603735487, |
|
"grad_norm": 0.3749614953994751, |
|
"learning_rate": 8.495709237758708e-06, |
|
"loss": 0.0574, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.5890964159515395, |
|
"grad_norm": 0.5852258801460266, |
|
"learning_rate": 8.465421504290763e-06, |
|
"loss": 0.064, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 3.5941443715295307, |
|
"grad_norm": 0.3820860981941223, |
|
"learning_rate": 8.435133770822818e-06, |
|
"loss": 0.0559, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 3.5991923271075215, |
|
"grad_norm": 0.5200080275535583, |
|
"learning_rate": 8.40484603735487e-06, |
|
"loss": 0.0556, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 3.604240282685512, |
|
"grad_norm": 0.6472256183624268, |
|
"learning_rate": 8.374558303886925e-06, |
|
"loss": 0.0596, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 3.6092882382635034, |
|
"grad_norm": 0.43182119727134705, |
|
"learning_rate": 8.34427057041898e-06, |
|
"loss": 0.0478, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 3.614336193841494, |
|
"grad_norm": 0.6659020781517029, |
|
"learning_rate": 8.313982836951035e-06, |
|
"loss": 0.054, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 3.619384149419485, |
|
"grad_norm": 0.6561934947967529, |
|
"learning_rate": 8.28369510348309e-06, |
|
"loss": 0.0583, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 3.624432104997476, |
|
"grad_norm": 0.7083423733711243, |
|
"learning_rate": 8.253407370015144e-06, |
|
"loss": 0.0598, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 3.629480060575467, |
|
"grad_norm": 0.6030146479606628, |
|
"learning_rate": 8.223119636547197e-06, |
|
"loss": 0.0569, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 3.6345280161534577, |
|
"grad_norm": 0.4650856554508209, |
|
"learning_rate": 8.192831903079253e-06, |
|
"loss": 0.0593, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.639575971731449, |
|
"grad_norm": 0.5656235814094543, |
|
"learning_rate": 8.162544169611308e-06, |
|
"loss": 0.058, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 3.6446239273094396, |
|
"grad_norm": 0.5745735764503479, |
|
"learning_rate": 8.132256436143363e-06, |
|
"loss": 0.0582, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 3.6496718828874304, |
|
"grad_norm": 0.7879515886306763, |
|
"learning_rate": 8.101968702675416e-06, |
|
"loss": 0.0593, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 3.6547198384654216, |
|
"grad_norm": 0.7000477313995361, |
|
"learning_rate": 8.071680969207471e-06, |
|
"loss": 0.0517, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 3.6597677940434123, |
|
"grad_norm": 0.44397464394569397, |
|
"learning_rate": 8.041393235739527e-06, |
|
"loss": 0.0569, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.664815749621403, |
|
"grad_norm": 0.55961674451828, |
|
"learning_rate": 8.01110550227158e-06, |
|
"loss": 0.0529, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 3.6698637051993943, |
|
"grad_norm": 0.5441805720329285, |
|
"learning_rate": 7.980817768803635e-06, |
|
"loss": 0.0537, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 3.674911660777385, |
|
"grad_norm": 0.5779780149459839, |
|
"learning_rate": 7.950530035335689e-06, |
|
"loss": 0.0549, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 3.679959616355376, |
|
"grad_norm": 0.4491129517555237, |
|
"learning_rate": 7.920242301867744e-06, |
|
"loss": 0.0527, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 3.685007571933367, |
|
"grad_norm": 0.6601787209510803, |
|
"learning_rate": 7.889954568399799e-06, |
|
"loss": 0.0545, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.690055527511358, |
|
"grad_norm": 0.7920609712600708, |
|
"learning_rate": 7.859666834931854e-06, |
|
"loss": 0.0607, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 3.6951034830893486, |
|
"grad_norm": 0.6220458149909973, |
|
"learning_rate": 7.829379101463906e-06, |
|
"loss": 0.0574, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 3.7001514386673398, |
|
"grad_norm": 0.6900739669799805, |
|
"learning_rate": 7.799091367995961e-06, |
|
"loss": 0.0549, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 3.7051993942453305, |
|
"grad_norm": 1.071191430091858, |
|
"learning_rate": 7.768803634528016e-06, |
|
"loss": 0.0644, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 3.7102473498233217, |
|
"grad_norm": 0.5342854261398315, |
|
"learning_rate": 7.738515901060071e-06, |
|
"loss": 0.0639, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 3.7152953054013125, |
|
"grad_norm": 0.49695709347724915, |
|
"learning_rate": 7.708228167592126e-06, |
|
"loss": 0.0525, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 3.7203432609793032, |
|
"grad_norm": 0.6041547060012817, |
|
"learning_rate": 7.67794043412418e-06, |
|
"loss": 0.0596, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 3.7253912165572944, |
|
"grad_norm": 0.6425964832305908, |
|
"learning_rate": 7.647652700656235e-06, |
|
"loss": 0.0626, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 3.730439172135285, |
|
"grad_norm": 0.5185597538948059, |
|
"learning_rate": 7.617364967188288e-06, |
|
"loss": 0.063, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 3.7354871277132764, |
|
"grad_norm": 0.48031681776046753, |
|
"learning_rate": 7.587077233720343e-06, |
|
"loss": 0.0633, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.740535083291267, |
|
"grad_norm": 0.46377626061439514, |
|
"learning_rate": 7.556789500252398e-06, |
|
"loss": 0.0581, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.745583038869258, |
|
"grad_norm": 0.7336452007293701, |
|
"learning_rate": 7.526501766784453e-06, |
|
"loss": 0.0572, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 3.750630994447249, |
|
"grad_norm": 0.8720684051513672, |
|
"learning_rate": 7.4962140333165064e-06, |
|
"loss": 0.0558, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 3.75567895002524, |
|
"grad_norm": 0.372592031955719, |
|
"learning_rate": 7.465926299848562e-06, |
|
"loss": 0.0613, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 3.7607269056032306, |
|
"grad_norm": 0.5049020648002625, |
|
"learning_rate": 7.435638566380616e-06, |
|
"loss": 0.058, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 3.765774861181222, |
|
"grad_norm": 0.5402325391769409, |
|
"learning_rate": 7.405350832912671e-06, |
|
"loss": 0.0484, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 3.7708228167592126, |
|
"grad_norm": 0.5662652850151062, |
|
"learning_rate": 7.375063099444725e-06, |
|
"loss": 0.0613, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 3.7758707723372034, |
|
"grad_norm": 0.6431825160980225, |
|
"learning_rate": 7.34477536597678e-06, |
|
"loss": 0.0522, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 3.7809187279151946, |
|
"grad_norm": 0.9309275150299072, |
|
"learning_rate": 7.314487632508835e-06, |
|
"loss": 0.0602, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 3.7859666834931853, |
|
"grad_norm": 0.801145076751709, |
|
"learning_rate": 7.284199899040888e-06, |
|
"loss": 0.0581, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.791014639071176, |
|
"grad_norm": 0.5122712850570679, |
|
"learning_rate": 7.253912165572943e-06, |
|
"loss": 0.0552, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 3.7960625946491673, |
|
"grad_norm": 0.39402052760124207, |
|
"learning_rate": 7.223624432104998e-06, |
|
"loss": 0.0552, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 3.801110550227158, |
|
"grad_norm": 0.5302004814147949, |
|
"learning_rate": 7.193336698637052e-06, |
|
"loss": 0.0626, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 3.806158505805149, |
|
"grad_norm": 0.4123098850250244, |
|
"learning_rate": 7.163048965169107e-06, |
|
"loss": 0.0569, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 3.81120646138314, |
|
"grad_norm": 0.8736279010772705, |
|
"learning_rate": 7.132761231701161e-06, |
|
"loss": 0.0537, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 3.8162544169611308, |
|
"grad_norm": 0.4374080002307892, |
|
"learning_rate": 7.102473498233216e-06, |
|
"loss": 0.057, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 3.8213023725391215, |
|
"grad_norm": 0.863776445388794, |
|
"learning_rate": 7.07218576476527e-06, |
|
"loss": 0.049, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 3.8263503281171127, |
|
"grad_norm": 0.5356324315071106, |
|
"learning_rate": 7.041898031297325e-06, |
|
"loss": 0.0578, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 3.8313982836951035, |
|
"grad_norm": 0.5422727465629578, |
|
"learning_rate": 7.0116102978293786e-06, |
|
"loss": 0.0577, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 3.8364462392730942, |
|
"grad_norm": 0.6234108805656433, |
|
"learning_rate": 6.981322564361434e-06, |
|
"loss": 0.0573, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.8414941948510855, |
|
"grad_norm": 0.9067860841751099, |
|
"learning_rate": 6.951034830893489e-06, |
|
"loss": 0.0471, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 3.846542150429076, |
|
"grad_norm": 0.5522469878196716, |
|
"learning_rate": 6.920747097425543e-06, |
|
"loss": 0.053, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 3.851590106007067, |
|
"grad_norm": 0.7358270287513733, |
|
"learning_rate": 6.8904593639575974e-06, |
|
"loss": 0.0561, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 3.856638061585058, |
|
"grad_norm": 0.5285794138908386, |
|
"learning_rate": 6.860171630489652e-06, |
|
"loss": 0.0618, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 3.861686017163049, |
|
"grad_norm": 0.6937068700790405, |
|
"learning_rate": 6.829883897021707e-06, |
|
"loss": 0.059, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 3.8667339727410397, |
|
"grad_norm": 0.6941738724708557, |
|
"learning_rate": 6.79959616355376e-06, |
|
"loss": 0.0515, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 3.871781928319031, |
|
"grad_norm": 0.8964054584503174, |
|
"learning_rate": 6.7693084300858155e-06, |
|
"loss": 0.0526, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 3.8768298838970217, |
|
"grad_norm": 0.5919986367225647, |
|
"learning_rate": 6.739020696617871e-06, |
|
"loss": 0.0577, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 3.8818778394750124, |
|
"grad_norm": 0.4616561532020569, |
|
"learning_rate": 6.708732963149924e-06, |
|
"loss": 0.0509, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 3.8869257950530036, |
|
"grad_norm": 0.6349731087684631, |
|
"learning_rate": 6.678445229681979e-06, |
|
"loss": 0.0535, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.8919737506309944, |
|
"grad_norm": 0.6474828720092773, |
|
"learning_rate": 6.6481574962140335e-06, |
|
"loss": 0.0552, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 3.897021706208985, |
|
"grad_norm": 0.5433930158615112, |
|
"learning_rate": 6.617869762746088e-06, |
|
"loss": 0.062, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 3.9020696617869763, |
|
"grad_norm": 0.6113614439964294, |
|
"learning_rate": 6.587582029278142e-06, |
|
"loss": 0.06, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 3.907117617364967, |
|
"grad_norm": 0.8800488114356995, |
|
"learning_rate": 6.557294295810197e-06, |
|
"loss": 0.0578, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 3.912165572942958, |
|
"grad_norm": 0.5158660411834717, |
|
"learning_rate": 6.5270065623422515e-06, |
|
"loss": 0.0524, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.917213528520949, |
|
"grad_norm": 0.5676606297492981, |
|
"learning_rate": 6.496718828874306e-06, |
|
"loss": 0.0474, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 3.92226148409894, |
|
"grad_norm": 0.6438203454017639, |
|
"learning_rate": 6.466431095406361e-06, |
|
"loss": 0.0587, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 3.9273094396769306, |
|
"grad_norm": 0.6570119857788086, |
|
"learning_rate": 6.436143361938415e-06, |
|
"loss": 0.0489, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 3.932357395254922, |
|
"grad_norm": 0.5620145201683044, |
|
"learning_rate": 6.4058556284704695e-06, |
|
"loss": 0.0559, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 3.9374053508329125, |
|
"grad_norm": 0.6886317729949951, |
|
"learning_rate": 6.375567895002524e-06, |
|
"loss": 0.0581, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.9424533064109037, |
|
"grad_norm": 0.7463077306747437, |
|
"learning_rate": 6.345280161534579e-06, |
|
"loss": 0.0477, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 3.9475012619888945, |
|
"grad_norm": 0.5246394276618958, |
|
"learning_rate": 6.314992428066633e-06, |
|
"loss": 0.0501, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 3.9525492175668853, |
|
"grad_norm": 0.5147930979728699, |
|
"learning_rate": 6.2847046945986876e-06, |
|
"loss": 0.0603, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 3.9575971731448765, |
|
"grad_norm": 0.3963003158569336, |
|
"learning_rate": 6.254416961130743e-06, |
|
"loss": 0.059, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 3.9626451287228672, |
|
"grad_norm": 0.7148598432540894, |
|
"learning_rate": 6.224129227662796e-06, |
|
"loss": 0.0524, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 3.967693084300858, |
|
"grad_norm": 0.5985211133956909, |
|
"learning_rate": 6.193841494194851e-06, |
|
"loss": 0.0609, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 3.972741039878849, |
|
"grad_norm": 0.6152123808860779, |
|
"learning_rate": 6.163553760726906e-06, |
|
"loss": 0.0622, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 3.97778899545684, |
|
"grad_norm": 0.49580270051956177, |
|
"learning_rate": 6.13326602725896e-06, |
|
"loss": 0.056, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 3.982836951034831, |
|
"grad_norm": 0.8874292373657227, |
|
"learning_rate": 6.102978293791015e-06, |
|
"loss": 0.0599, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 3.987884906612822, |
|
"grad_norm": 0.6198350787162781, |
|
"learning_rate": 6.072690560323069e-06, |
|
"loss": 0.0546, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.9929328621908127, |
|
"grad_norm": 0.39257192611694336, |
|
"learning_rate": 6.042402826855124e-06, |
|
"loss": 0.0523, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 3.997980817768804, |
|
"grad_norm": 0.4612904191017151, |
|
"learning_rate": 6.012115093387178e-06, |
|
"loss": 0.0685, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9705180789481339, |
|
"eval_loss": 0.038467586040496826, |
|
"eval_runtime": 578.9562, |
|
"eval_samples_per_second": 356.265, |
|
"eval_steps_per_second": 2.784, |
|
"step": 7924 |
|
}, |
|
{ |
|
"epoch": 4.003028773346794, |
|
"grad_norm": 0.7146291732788086, |
|
"learning_rate": 5.981827359919233e-06, |
|
"loss": 0.0575, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 4.008076728924785, |
|
"grad_norm": 0.6313480138778687, |
|
"learning_rate": 5.951539626451287e-06, |
|
"loss": 0.0581, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 4.013124684502777, |
|
"grad_norm": 0.4977870583534241, |
|
"learning_rate": 5.921251892983342e-06, |
|
"loss": 0.0582, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 4.018172640080767, |
|
"grad_norm": 0.4447147250175476, |
|
"learning_rate": 5.890964159515397e-06, |
|
"loss": 0.0544, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 4.023220595658758, |
|
"grad_norm": 0.6496310234069824, |
|
"learning_rate": 5.860676426047451e-06, |
|
"loss": 0.0595, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 4.028268551236749, |
|
"grad_norm": 0.4380001127719879, |
|
"learning_rate": 5.830388692579505e-06, |
|
"loss": 0.0549, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 4.03331650681474, |
|
"grad_norm": 0.5718368887901306, |
|
"learning_rate": 5.80010095911156e-06, |
|
"loss": 0.0559, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 4.038364462392731, |
|
"grad_norm": 0.5859358906745911, |
|
"learning_rate": 5.769813225643615e-06, |
|
"loss": 0.0572, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.043412417970722, |
|
"grad_norm": 0.49378788471221924, |
|
"learning_rate": 5.739525492175669e-06, |
|
"loss": 0.054, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 4.048460373548712, |
|
"grad_norm": 0.6780097484588623, |
|
"learning_rate": 5.709237758707723e-06, |
|
"loss": 0.0568, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 4.053508329126704, |
|
"grad_norm": 0.8048389554023743, |
|
"learning_rate": 5.6789500252397786e-06, |
|
"loss": 0.0527, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 4.058556284704695, |
|
"grad_norm": 0.4513346254825592, |
|
"learning_rate": 5.648662291771832e-06, |
|
"loss": 0.0597, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 4.063604240282685, |
|
"grad_norm": 0.6877405643463135, |
|
"learning_rate": 5.618374558303887e-06, |
|
"loss": 0.0594, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 4.068652195860676, |
|
"grad_norm": 0.41468387842178345, |
|
"learning_rate": 5.5880868248359414e-06, |
|
"loss": 0.0563, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 4.0737001514386675, |
|
"grad_norm": 0.5062978267669678, |
|
"learning_rate": 5.557799091367996e-06, |
|
"loss": 0.0598, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 4.078748107016659, |
|
"grad_norm": 0.6427041888237, |
|
"learning_rate": 5.527511357900051e-06, |
|
"loss": 0.057, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 4.083796062594649, |
|
"grad_norm": 0.5508936643600464, |
|
"learning_rate": 5.497223624432105e-06, |
|
"loss": 0.0472, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 4.08884401817264, |
|
"grad_norm": 0.39490872621536255, |
|
"learning_rate": 5.4669358909641595e-06, |
|
"loss": 0.0589, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.093891973750631, |
|
"grad_norm": 0.5776220560073853, |
|
"learning_rate": 5.436648157496214e-06, |
|
"loss": 0.0602, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 4.098939929328622, |
|
"grad_norm": 0.36714500188827515, |
|
"learning_rate": 5.406360424028269e-06, |
|
"loss": 0.0474, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 4.103987884906613, |
|
"grad_norm": 0.7429747581481934, |
|
"learning_rate": 5.376072690560323e-06, |
|
"loss": 0.0516, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 4.109035840484604, |
|
"grad_norm": 0.7167190909385681, |
|
"learning_rate": 5.3457849570923775e-06, |
|
"loss": 0.0559, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 4.1140837960625944, |
|
"grad_norm": 0.5668296217918396, |
|
"learning_rate": 5.315497223624433e-06, |
|
"loss": 0.0558, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 4.119131751640586, |
|
"grad_norm": 0.5577311515808105, |
|
"learning_rate": 5.285209490156487e-06, |
|
"loss": 0.0589, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 4.124179707218577, |
|
"grad_norm": 0.611304759979248, |
|
"learning_rate": 5.254921756688541e-06, |
|
"loss": 0.0546, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 4.129227662796567, |
|
"grad_norm": 0.5540894865989685, |
|
"learning_rate": 5.2246340232205955e-06, |
|
"loss": 0.0611, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 4.134275618374558, |
|
"grad_norm": 0.5128312706947327, |
|
"learning_rate": 5.194346289752651e-06, |
|
"loss": 0.0552, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 4.13932357395255, |
|
"grad_norm": 0.6017599105834961, |
|
"learning_rate": 5.164058556284704e-06, |
|
"loss": 0.0494, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 4.14437152953054, |
|
"grad_norm": 0.42843466997146606, |
|
"learning_rate": 5.133770822816759e-06, |
|
"loss": 0.0534, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 4.149419485108531, |
|
"grad_norm": 0.6050401926040649, |
|
"learning_rate": 5.103483089348814e-06, |
|
"loss": 0.0524, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.154467440686522, |
|
"grad_norm": 0.512793242931366, |
|
"learning_rate": 5.073195355880868e-06, |
|
"loss": 0.0562, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 4.159515396264513, |
|
"grad_norm": 0.5130860209465027, |
|
"learning_rate": 5.042907622412923e-06, |
|
"loss": 0.0413, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 4.164563351842504, |
|
"grad_norm": 0.6443082690238953, |
|
"learning_rate": 5.012619888944977e-06, |
|
"loss": 0.0593, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 4.169611307420495, |
|
"grad_norm": 0.6051344871520996, |
|
"learning_rate": 4.982332155477032e-06, |
|
"loss": 0.0542, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 4.174659262998485, |
|
"grad_norm": 0.5795598030090332, |
|
"learning_rate": 4.952044422009086e-06, |
|
"loss": 0.0569, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 4.1797072185764765, |
|
"grad_norm": 0.6054142117500305, |
|
"learning_rate": 4.921756688541141e-06, |
|
"loss": 0.0575, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 4.184755174154468, |
|
"grad_norm": 0.6954050660133362, |
|
"learning_rate": 4.891468955073196e-06, |
|
"loss": 0.0609, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 4.189803129732458, |
|
"grad_norm": 0.7217870354652405, |
|
"learning_rate": 4.86118122160525e-06, |
|
"loss": 0.0559, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 4.194851085310449, |
|
"grad_norm": 0.49758586287498474, |
|
"learning_rate": 4.830893488137305e-06, |
|
"loss": 0.0506, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 4.1998990408884405, |
|
"grad_norm": 0.4497081935405731, |
|
"learning_rate": 4.800605754669359e-06, |
|
"loss": 0.0581, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 4.204946996466431, |
|
"grad_norm": 0.6054022312164307, |
|
"learning_rate": 4.770318021201413e-06, |
|
"loss": 0.0596, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 4.209994952044422, |
|
"grad_norm": 0.7262012958526611, |
|
"learning_rate": 4.7400302877334685e-06, |
|
"loss": 0.0489, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 4.215042907622413, |
|
"grad_norm": 0.6226342916488647, |
|
"learning_rate": 4.709742554265523e-06, |
|
"loss": 0.0596, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 4.2200908632004035, |
|
"grad_norm": 0.8234953284263611, |
|
"learning_rate": 4.679454820797577e-06, |
|
"loss": 0.057, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 4.225138818778395, |
|
"grad_norm": 0.8438859581947327, |
|
"learning_rate": 4.649167087329631e-06, |
|
"loss": 0.0516, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 4.230186774356386, |
|
"grad_norm": 0.5095875263214111, |
|
"learning_rate": 4.6188793538616865e-06, |
|
"loss": 0.0646, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 4.235234729934376, |
|
"grad_norm": 0.5543855428695679, |
|
"learning_rate": 4.58859162039374e-06, |
|
"loss": 0.0482, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 4.240282685512367, |
|
"grad_norm": 0.7510880827903748, |
|
"learning_rate": 4.558303886925795e-06, |
|
"loss": 0.0595, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.245330641090359, |
|
"grad_norm": 0.5140940546989441, |
|
"learning_rate": 4.52801615345785e-06, |
|
"loss": 0.0568, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 4.250378596668349, |
|
"grad_norm": 0.43089789152145386, |
|
"learning_rate": 4.497728419989904e-06, |
|
"loss": 0.058, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 4.25542655224634, |
|
"grad_norm": 0.6229716539382935, |
|
"learning_rate": 4.467440686521959e-06, |
|
"loss": 0.0538, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 4.260474507824331, |
|
"grad_norm": 0.6465341448783875, |
|
"learning_rate": 4.437152953054013e-06, |
|
"loss": 0.0544, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 4.265522463402322, |
|
"grad_norm": 0.42706695199012756, |
|
"learning_rate": 4.406865219586068e-06, |
|
"loss": 0.0562, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 4.270570418980313, |
|
"grad_norm": 0.5305337309837341, |
|
"learning_rate": 4.376577486118122e-06, |
|
"loss": 0.0567, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 4.275618374558304, |
|
"grad_norm": 0.7307097315788269, |
|
"learning_rate": 4.346289752650177e-06, |
|
"loss": 0.0486, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 4.280666330136295, |
|
"grad_norm": 0.5940870046615601, |
|
"learning_rate": 4.316002019182232e-06, |
|
"loss": 0.0514, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.4446733593940735, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0545, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 4.290762241292277, |
|
"grad_norm": 0.9121294617652893, |
|
"learning_rate": 4.255426552246341e-06, |
|
"loss": 0.0557, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.295810196870267, |
|
"grad_norm": 0.568056583404541, |
|
"learning_rate": 4.225138818778395e-06, |
|
"loss": 0.0522, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 4.300858152448258, |
|
"grad_norm": 0.8788109421730042, |
|
"learning_rate": 4.194851085310449e-06, |
|
"loss": 0.0433, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 4.3059061080262495, |
|
"grad_norm": 0.7445030808448792, |
|
"learning_rate": 4.1645633518425035e-06, |
|
"loss": 0.05, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 4.310954063604241, |
|
"grad_norm": 0.8348667621612549, |
|
"learning_rate": 4.134275618374559e-06, |
|
"loss": 0.0584, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 4.316002019182231, |
|
"grad_norm": 0.462342232465744, |
|
"learning_rate": 4.103987884906613e-06, |
|
"loss": 0.0555, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 4.321049974760222, |
|
"grad_norm": 0.42785176634788513, |
|
"learning_rate": 4.073700151438667e-06, |
|
"loss": 0.0607, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 4.326097930338213, |
|
"grad_norm": 0.7172122597694397, |
|
"learning_rate": 4.043412417970722e-06, |
|
"loss": 0.0675, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 4.331145885916204, |
|
"grad_norm": 0.4495554566383362, |
|
"learning_rate": 4.013124684502776e-06, |
|
"loss": 0.0546, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 4.336193841494195, |
|
"grad_norm": 0.5083460807800293, |
|
"learning_rate": 3.982836951034831e-06, |
|
"loss": 0.06, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 4.341241797072186, |
|
"grad_norm": 0.4353145360946655, |
|
"learning_rate": 3.952549217566885e-06, |
|
"loss": 0.0535, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.3462897526501765, |
|
"grad_norm": 0.6741386651992798, |
|
"learning_rate": 3.92226148409894e-06, |
|
"loss": 0.0581, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 4.351337708228168, |
|
"grad_norm": 0.47798269987106323, |
|
"learning_rate": 3.891973750630995e-06, |
|
"loss": 0.0541, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 4.356385663806159, |
|
"grad_norm": 0.49109166860580444, |
|
"learning_rate": 3.861686017163049e-06, |
|
"loss": 0.0608, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 4.361433619384149, |
|
"grad_norm": 0.8310505747795105, |
|
"learning_rate": 3.831398283695104e-06, |
|
"loss": 0.0514, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 4.36648157496214, |
|
"grad_norm": 0.4586045742034912, |
|
"learning_rate": 3.801110550227158e-06, |
|
"loss": 0.0538, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 4.371529530540132, |
|
"grad_norm": 0.4350300133228302, |
|
"learning_rate": 3.7708228167592127e-06, |
|
"loss": 0.0526, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 4.376577486118122, |
|
"grad_norm": 0.6310685276985168, |
|
"learning_rate": 3.740535083291267e-06, |
|
"loss": 0.0597, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 4.381625441696113, |
|
"grad_norm": 0.6845548152923584, |
|
"learning_rate": 3.7102473498233217e-06, |
|
"loss": 0.0542, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 4.386673397274104, |
|
"grad_norm": 1.085631012916565, |
|
"learning_rate": 3.679959616355376e-06, |
|
"loss": 0.0601, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 4.391721352852095, |
|
"grad_norm": 0.6232538223266602, |
|
"learning_rate": 3.6496718828874303e-06, |
|
"loss": 0.0557, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.396769308430086, |
|
"grad_norm": 0.4568091630935669, |
|
"learning_rate": 3.6193841494194855e-06, |
|
"loss": 0.0494, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 4.401817264008077, |
|
"grad_norm": 0.7550612092018127, |
|
"learning_rate": 3.5890964159515398e-06, |
|
"loss": 0.0562, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.406865219586067, |
|
"grad_norm": 0.5380585789680481, |
|
"learning_rate": 3.5588086824835945e-06, |
|
"loss": 0.0521, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 4.411913175164059, |
|
"grad_norm": 0.42225027084350586, |
|
"learning_rate": 3.5285209490156488e-06, |
|
"loss": 0.0515, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 4.41696113074205, |
|
"grad_norm": 0.5831999778747559, |
|
"learning_rate": 3.498233215547703e-06, |
|
"loss": 0.0465, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.42200908632004, |
|
"grad_norm": 0.7943524718284607, |
|
"learning_rate": 3.4679454820797578e-06, |
|
"loss": 0.062, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 4.427057041898031, |
|
"grad_norm": 0.634747326374054, |
|
"learning_rate": 3.437657748611812e-06, |
|
"loss": 0.0496, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 4.4321049974760225, |
|
"grad_norm": 0.5734288692474365, |
|
"learning_rate": 3.407370015143867e-06, |
|
"loss": 0.0612, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 4.437152953054013, |
|
"grad_norm": 0.7079018354415894, |
|
"learning_rate": 3.3770822816759215e-06, |
|
"loss": 0.0578, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 4.442200908632004, |
|
"grad_norm": 0.44444698095321655, |
|
"learning_rate": 3.346794548207976e-06, |
|
"loss": 0.0559, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.447248864209995, |
|
"grad_norm": 0.7473122477531433, |
|
"learning_rate": 3.3165068147400305e-06, |
|
"loss": 0.0544, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 4.4522968197879855, |
|
"grad_norm": 0.6658338308334351, |
|
"learning_rate": 3.286219081272085e-06, |
|
"loss": 0.0552, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 4.457344775365977, |
|
"grad_norm": 0.48870500922203064, |
|
"learning_rate": 3.255931347804139e-06, |
|
"loss": 0.0566, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 4.462392730943968, |
|
"grad_norm": 0.6261917948722839, |
|
"learning_rate": 3.2256436143361943e-06, |
|
"loss": 0.0487, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 4.467440686521958, |
|
"grad_norm": 0.6060011982917786, |
|
"learning_rate": 3.1953558808682486e-06, |
|
"loss": 0.0514, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 4.4724886420999495, |
|
"grad_norm": 0.4858971834182739, |
|
"learning_rate": 3.165068147400303e-06, |
|
"loss": 0.05, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 4.477536597677941, |
|
"grad_norm": 0.6394979357719421, |
|
"learning_rate": 3.1347804139323576e-06, |
|
"loss": 0.0604, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 4.482584553255931, |
|
"grad_norm": 0.6840482950210571, |
|
"learning_rate": 3.104492680464412e-06, |
|
"loss": 0.0514, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 4.487632508833922, |
|
"grad_norm": 0.388715535402298, |
|
"learning_rate": 3.0742049469964666e-06, |
|
"loss": 0.0479, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 4.492680464411913, |
|
"grad_norm": 0.6516565084457397, |
|
"learning_rate": 3.043917213528521e-06, |
|
"loss": 0.0608, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.497728419989904, |
|
"grad_norm": 0.76282799243927, |
|
"learning_rate": 3.0136294800605756e-06, |
|
"loss": 0.0572, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 4.502776375567895, |
|
"grad_norm": 0.49448370933532715, |
|
"learning_rate": 2.9833417465926303e-06, |
|
"loss": 0.0575, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 4.507824331145886, |
|
"grad_norm": 0.5593730807304382, |
|
"learning_rate": 2.9530540131246846e-06, |
|
"loss": 0.0486, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 4.512872286723876, |
|
"grad_norm": 0.5773325562477112, |
|
"learning_rate": 2.922766279656739e-06, |
|
"loss": 0.0541, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 4.517920242301868, |
|
"grad_norm": 0.34630000591278076, |
|
"learning_rate": 2.8924785461887936e-06, |
|
"loss": 0.0606, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 4.522968197879859, |
|
"grad_norm": 0.5409483313560486, |
|
"learning_rate": 2.862190812720848e-06, |
|
"loss": 0.0589, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 4.52801615345785, |
|
"grad_norm": 0.5004202127456665, |
|
"learning_rate": 2.8319030792529026e-06, |
|
"loss": 0.0621, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 4.53306410903584, |
|
"grad_norm": 0.4979722797870636, |
|
"learning_rate": 2.8016153457849574e-06, |
|
"loss": 0.0537, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 4.5381120646138315, |
|
"grad_norm": 0.6733251214027405, |
|
"learning_rate": 2.7713276123170117e-06, |
|
"loss": 0.069, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 4.543160020191822, |
|
"grad_norm": 0.4152880609035492, |
|
"learning_rate": 2.7410398788490664e-06, |
|
"loss": 0.0565, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.548207975769813, |
|
"grad_norm": 0.6170037984848022, |
|
"learning_rate": 2.7107521453811207e-06, |
|
"loss": 0.0589, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 4.553255931347804, |
|
"grad_norm": 0.5258937478065491, |
|
"learning_rate": 2.680464411913175e-06, |
|
"loss": 0.0548, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 4.5583038869257955, |
|
"grad_norm": 0.534015417098999, |
|
"learning_rate": 2.6501766784452297e-06, |
|
"loss": 0.0447, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 4.563351842503786, |
|
"grad_norm": 0.86041259765625, |
|
"learning_rate": 2.6198889449772844e-06, |
|
"loss": 0.0578, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 4.568399798081777, |
|
"grad_norm": 0.8807480335235596, |
|
"learning_rate": 2.589601211509339e-06, |
|
"loss": 0.0479, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 4.573447753659767, |
|
"grad_norm": 0.6071127653121948, |
|
"learning_rate": 2.5593134780413934e-06, |
|
"loss": 0.0521, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 4.5784957092377585, |
|
"grad_norm": 0.9106950759887695, |
|
"learning_rate": 2.5290257445734477e-06, |
|
"loss": 0.056, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 4.58354366481575, |
|
"grad_norm": 0.6179044246673584, |
|
"learning_rate": 2.4987380111055024e-06, |
|
"loss": 0.0548, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 4.588591620393741, |
|
"grad_norm": 0.9295970797538757, |
|
"learning_rate": 2.4684502776375567e-06, |
|
"loss": 0.0626, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 4.593639575971731, |
|
"grad_norm": 0.4483726918697357, |
|
"learning_rate": 2.438162544169611e-06, |
|
"loss": 0.0531, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.598687531549722, |
|
"grad_norm": 0.38749760389328003, |
|
"learning_rate": 2.407874810701666e-06, |
|
"loss": 0.0514, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 4.603735487127714, |
|
"grad_norm": 0.7203320860862732, |
|
"learning_rate": 2.3775870772337205e-06, |
|
"loss": 0.0603, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 4.608783442705704, |
|
"grad_norm": 0.8010473251342773, |
|
"learning_rate": 2.347299343765775e-06, |
|
"loss": 0.053, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 4.613831398283695, |
|
"grad_norm": 0.7866964936256409, |
|
"learning_rate": 2.3170116102978295e-06, |
|
"loss": 0.0544, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 4.618879353861686, |
|
"grad_norm": 0.9333378076553345, |
|
"learning_rate": 2.2867238768298838e-06, |
|
"loss": 0.0472, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 4.623927309439677, |
|
"grad_norm": 0.5904621481895447, |
|
"learning_rate": 2.2564361433619385e-06, |
|
"loss": 0.0515, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 4.628975265017668, |
|
"grad_norm": 0.6837446093559265, |
|
"learning_rate": 2.2261484098939928e-06, |
|
"loss": 0.0566, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 4.634023220595659, |
|
"grad_norm": 0.5726220607757568, |
|
"learning_rate": 2.1958606764260475e-06, |
|
"loss": 0.0521, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 4.639071176173649, |
|
"grad_norm": 0.5920945405960083, |
|
"learning_rate": 2.1655729429581022e-06, |
|
"loss": 0.0527, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 4.644119131751641, |
|
"grad_norm": 0.5921088457107544, |
|
"learning_rate": 2.1352852094901565e-06, |
|
"loss": 0.0594, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.649167087329632, |
|
"grad_norm": 0.8026402592658997, |
|
"learning_rate": 2.1049974760222112e-06, |
|
"loss": 0.058, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 4.654215042907622, |
|
"grad_norm": 0.9913181066513062, |
|
"learning_rate": 2.0747097425542655e-06, |
|
"loss": 0.0591, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 4.659262998485613, |
|
"grad_norm": 0.675123393535614, |
|
"learning_rate": 2.04442200908632e-06, |
|
"loss": 0.0561, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 4.6643109540636045, |
|
"grad_norm": 0.5947641730308533, |
|
"learning_rate": 2.014134275618375e-06, |
|
"loss": 0.0486, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 4.669358909641595, |
|
"grad_norm": 0.5389765501022339, |
|
"learning_rate": 1.9838465421504293e-06, |
|
"loss": 0.0586, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.674406865219586, |
|
"grad_norm": 0.5905711054801941, |
|
"learning_rate": 1.9535588086824836e-06, |
|
"loss": 0.0523, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 4.679454820797577, |
|
"grad_norm": 0.36754655838012695, |
|
"learning_rate": 1.9232710752145383e-06, |
|
"loss": 0.0518, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 4.684502776375568, |
|
"grad_norm": 0.5583412647247314, |
|
"learning_rate": 1.8929833417465926e-06, |
|
"loss": 0.0536, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 4.689550731953559, |
|
"grad_norm": 0.4586925506591797, |
|
"learning_rate": 1.8626956082786473e-06, |
|
"loss": 0.0482, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 4.69459868753155, |
|
"grad_norm": 0.4932919442653656, |
|
"learning_rate": 1.8324078748107018e-06, |
|
"loss": 0.0484, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.69964664310954, |
|
"grad_norm": 0.3211473524570465, |
|
"learning_rate": 1.802120141342756e-06, |
|
"loss": 0.0522, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 4.7046945986875315, |
|
"grad_norm": 0.8603491187095642, |
|
"learning_rate": 1.7718324078748106e-06, |
|
"loss": 0.0585, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 4.709742554265523, |
|
"grad_norm": 0.7181740999221802, |
|
"learning_rate": 1.7415446744068653e-06, |
|
"loss": 0.0522, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 4.714790509843513, |
|
"grad_norm": 0.49415314197540283, |
|
"learning_rate": 1.7112569409389198e-06, |
|
"loss": 0.0417, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 4.719838465421504, |
|
"grad_norm": 0.758638322353363, |
|
"learning_rate": 1.6809692074709741e-06, |
|
"loss": 0.0608, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 4.724886420999495, |
|
"grad_norm": 0.6659887433052063, |
|
"learning_rate": 1.6506814740030288e-06, |
|
"loss": 0.0468, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 4.729934376577486, |
|
"grad_norm": 0.3270837962627411, |
|
"learning_rate": 1.6203937405350833e-06, |
|
"loss": 0.0602, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 4.734982332155477, |
|
"grad_norm": 0.6695159077644348, |
|
"learning_rate": 1.5901060070671379e-06, |
|
"loss": 0.0515, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 4.740030287733468, |
|
"grad_norm": 0.8143603205680847, |
|
"learning_rate": 1.5598182735991924e-06, |
|
"loss": 0.0613, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 4.745078243311459, |
|
"grad_norm": 0.6727936863899231, |
|
"learning_rate": 1.5295305401312469e-06, |
|
"loss": 0.0505, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.75012619888945, |
|
"grad_norm": 0.5365564823150635, |
|
"learning_rate": 1.4992428066633014e-06, |
|
"loss": 0.0512, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 4.755174154467441, |
|
"grad_norm": 0.5240725874900818, |
|
"learning_rate": 1.4689550731953559e-06, |
|
"loss": 0.0526, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 4.760222110045431, |
|
"grad_norm": 0.6975441575050354, |
|
"learning_rate": 1.4386673397274104e-06, |
|
"loss": 0.0592, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 4.765270065623422, |
|
"grad_norm": 0.44649407267570496, |
|
"learning_rate": 1.408379606259465e-06, |
|
"loss": 0.0597, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 4.770318021201414, |
|
"grad_norm": 0.598850429058075, |
|
"learning_rate": 1.3780918727915194e-06, |
|
"loss": 0.0606, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 4.775365976779405, |
|
"grad_norm": 0.57352614402771, |
|
"learning_rate": 1.3478041393235741e-06, |
|
"loss": 0.0502, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 4.780413932357395, |
|
"grad_norm": 0.7437055706977844, |
|
"learning_rate": 1.3175164058556284e-06, |
|
"loss": 0.0521, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 4.785461887935386, |
|
"grad_norm": 0.6993494629859924, |
|
"learning_rate": 1.287228672387683e-06, |
|
"loss": 0.0565, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 4.790509843513377, |
|
"grad_norm": 0.8067084550857544, |
|
"learning_rate": 1.2569409389197376e-06, |
|
"loss": 0.0575, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 4.795557799091368, |
|
"grad_norm": 0.5363942384719849, |
|
"learning_rate": 1.2266532054517921e-06, |
|
"loss": 0.058, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.800605754669359, |
|
"grad_norm": 0.8145700693130493, |
|
"learning_rate": 1.1963654719838464e-06, |
|
"loss": 0.0488, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 4.80565371024735, |
|
"grad_norm": 0.7701184153556824, |
|
"learning_rate": 1.166077738515901e-06, |
|
"loss": 0.0577, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 4.8107016658253405, |
|
"grad_norm": 0.5177111625671387, |
|
"learning_rate": 1.1357900050479557e-06, |
|
"loss": 0.0605, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 4.815749621403332, |
|
"grad_norm": 0.44751742482185364, |
|
"learning_rate": 1.1055022715800102e-06, |
|
"loss": 0.0565, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 4.820797576981323, |
|
"grad_norm": 0.37919309735298157, |
|
"learning_rate": 1.0752145381120645e-06, |
|
"loss": 0.0454, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 4.825845532559313, |
|
"grad_norm": 0.6037785410881042, |
|
"learning_rate": 1.0449268046441192e-06, |
|
"loss": 0.0606, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 4.8308934881373045, |
|
"grad_norm": 0.3584793508052826, |
|
"learning_rate": 1.0146390711761737e-06, |
|
"loss": 0.0503, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 4.835941443715296, |
|
"grad_norm": 0.49841853976249695, |
|
"learning_rate": 9.843513377082282e-07, |
|
"loss": 0.0434, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 4.840989399293286, |
|
"grad_norm": 0.5114769339561462, |
|
"learning_rate": 9.540636042402827e-07, |
|
"loss": 0.0535, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 4.846037354871277, |
|
"grad_norm": 0.5932824611663818, |
|
"learning_rate": 9.237758707723372e-07, |
|
"loss": 0.0547, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.851085310449268, |
|
"grad_norm": 0.6020333766937256, |
|
"learning_rate": 8.934881373043917e-07, |
|
"loss": 0.0597, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 4.856133266027259, |
|
"grad_norm": 0.721193790435791, |
|
"learning_rate": 8.632004038364462e-07, |
|
"loss": 0.0614, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 4.86118122160525, |
|
"grad_norm": 0.4858354926109314, |
|
"learning_rate": 8.329126703685008e-07, |
|
"loss": 0.0555, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 4.866229177183241, |
|
"grad_norm": 0.7863103747367859, |
|
"learning_rate": 8.026249369005552e-07, |
|
"loss": 0.0554, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 4.871277132761231, |
|
"grad_norm": 0.8363025784492493, |
|
"learning_rate": 7.723372034326099e-07, |
|
"loss": 0.0565, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 4.876325088339223, |
|
"grad_norm": 0.6137521266937256, |
|
"learning_rate": 7.420494699646643e-07, |
|
"loss": 0.0575, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 4.881373043917214, |
|
"grad_norm": 0.4781091511249542, |
|
"learning_rate": 7.117617364967189e-07, |
|
"loss": 0.0478, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 4.886420999495204, |
|
"grad_norm": 0.8294112086296082, |
|
"learning_rate": 6.814740030287734e-07, |
|
"loss": 0.0593, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 4.891468955073195, |
|
"grad_norm": 0.5780894160270691, |
|
"learning_rate": 6.511862695608279e-07, |
|
"loss": 0.0518, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 4.8965169106511865, |
|
"grad_norm": 0.4407060146331787, |
|
"learning_rate": 6.208985360928824e-07, |
|
"loss": 0.0522, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.901564866229177, |
|
"grad_norm": 0.4369337558746338, |
|
"learning_rate": 5.906108026249369e-07, |
|
"loss": 0.0522, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 4.906612821807168, |
|
"grad_norm": 0.8428089022636414, |
|
"learning_rate": 5.603230691569914e-07, |
|
"loss": 0.0468, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 4.911660777385159, |
|
"grad_norm": 0.6303294897079468, |
|
"learning_rate": 5.30035335689046e-07, |
|
"loss": 0.0577, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 4.91670873296315, |
|
"grad_norm": 0.4869242012500763, |
|
"learning_rate": 4.997476022211004e-07, |
|
"loss": 0.0472, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 4.921756688541141, |
|
"grad_norm": 0.5907611846923828, |
|
"learning_rate": 4.69459868753155e-07, |
|
"loss": 0.0455, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.926804644119132, |
|
"grad_norm": 0.6162139177322388, |
|
"learning_rate": 4.3917213528520954e-07, |
|
"loss": 0.0475, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 4.931852599697122, |
|
"grad_norm": 0.5222154259681702, |
|
"learning_rate": 4.0888440181726405e-07, |
|
"loss": 0.0513, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 4.9369005552751135, |
|
"grad_norm": 0.5132977366447449, |
|
"learning_rate": 3.7859666834931856e-07, |
|
"loss": 0.043, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 4.941948510853105, |
|
"grad_norm": 0.6620015501976013, |
|
"learning_rate": 3.4830893488137306e-07, |
|
"loss": 0.0598, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 4.946996466431095, |
|
"grad_norm": 0.7160341143608093, |
|
"learning_rate": 3.1802120141342757e-07, |
|
"loss": 0.0539, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.952044422009086, |
|
"grad_norm": 0.5954631567001343, |
|
"learning_rate": 2.8773346794548213e-07, |
|
"loss": 0.0581, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 4.957092377587077, |
|
"grad_norm": 1.0010461807250977, |
|
"learning_rate": 2.5744573447753664e-07, |
|
"loss": 0.0499, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 4.962140333165069, |
|
"grad_norm": 0.5768128633499146, |
|
"learning_rate": 2.2715800100959112e-07, |
|
"loss": 0.0562, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 4.967188288743059, |
|
"grad_norm": 0.6427052617073059, |
|
"learning_rate": 1.9687026754164563e-07, |
|
"loss": 0.0545, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 4.97223624432105, |
|
"grad_norm": 0.6932212114334106, |
|
"learning_rate": 1.6658253407370016e-07, |
|
"loss": 0.0575, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 4.9772841998990405, |
|
"grad_norm": 0.4219547510147095, |
|
"learning_rate": 1.3629480060575467e-07, |
|
"loss": 0.0491, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 4.982332155477032, |
|
"grad_norm": 0.5215485692024231, |
|
"learning_rate": 1.0600706713780919e-07, |
|
"loss": 0.0438, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 4.987380111055023, |
|
"grad_norm": 0.36851760745048523, |
|
"learning_rate": 7.57193336698637e-08, |
|
"loss": 0.052, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 4.992428066633014, |
|
"grad_norm": 0.5213483572006226, |
|
"learning_rate": 4.5431600201918226e-08, |
|
"loss": 0.0472, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 4.997476022211004, |
|
"grad_norm": 0.710657000541687, |
|
"learning_rate": 1.514386673397274e-08, |
|
"loss": 0.0582, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9705180789481339, |
|
"eval_loss": 0.03909851238131523, |
|
"eval_runtime": 579.4034, |
|
"eval_samples_per_second": 355.99, |
|
"eval_steps_per_second": 2.782, |
|
"step": 9905 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 9905, |
|
"total_flos": 9.82152667464321e+19, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.0755, |
|
"train_samples_per_second": 16783731.897, |
|
"train_steps_per_second": 131178.777 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9905, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.82152667464321e+19, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|