|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.698944636348022, |
|
"eval_steps": 500, |
|
"global_step": 27468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007888191250469157, |
|
"grad_norm": 19.37966537475586, |
|
"learning_rate": 1.0157273918741808e-06, |
|
"loss": 8.7613, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0015776382500938314, |
|
"grad_norm": 13.922501564025879, |
|
"learning_rate": 2.0314547837483616e-06, |
|
"loss": 7.7444, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002366457375140747, |
|
"grad_norm": 11.746194839477539, |
|
"learning_rate": 3.0471821756225426e-06, |
|
"loss": 6.5314, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.003155276500187663, |
|
"grad_norm": 16.49347686767578, |
|
"learning_rate": 4.062909567496723e-06, |
|
"loss": 5.3965, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.003944095625234578, |
|
"grad_norm": 14.946576118469238, |
|
"learning_rate": 5.078636959370905e-06, |
|
"loss": 4.8936, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.004732914750281494, |
|
"grad_norm": 16.985593795776367, |
|
"learning_rate": 6.094364351245085e-06, |
|
"loss": 4.5739, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.00552173387532841, |
|
"grad_norm": 17.912887573242188, |
|
"learning_rate": 7.110091743119267e-06, |
|
"loss": 4.3466, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.006310553000375326, |
|
"grad_norm": 15.746530532836914, |
|
"learning_rate": 8.125819134993446e-06, |
|
"loss": 4.1902, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.007099372125422241, |
|
"grad_norm": 24.640979766845703, |
|
"learning_rate": 9.141546526867629e-06, |
|
"loss": 4.0289, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.007888191250469157, |
|
"grad_norm": 14.921712875366211, |
|
"learning_rate": 1.015727391874181e-05, |
|
"loss": 3.8831, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.008677010375516072, |
|
"grad_norm": 13.711713790893555, |
|
"learning_rate": 1.117300131061599e-05, |
|
"loss": 3.7542, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.009465829500562987, |
|
"grad_norm": 12.796462059020996, |
|
"learning_rate": 1.218872870249017e-05, |
|
"loss": 3.6361, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.010254648625609904, |
|
"grad_norm": 13.015061378479004, |
|
"learning_rate": 1.3204456094364351e-05, |
|
"loss": 3.5447, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.01104346775065682, |
|
"grad_norm": 11.99329662322998, |
|
"learning_rate": 1.4220183486238533e-05, |
|
"loss": 3.4755, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.011832286875703734, |
|
"grad_norm": 14.673354148864746, |
|
"learning_rate": 1.5235910878112714e-05, |
|
"loss": 3.384, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.012621106000750651, |
|
"grad_norm": 14.919001579284668, |
|
"learning_rate": 1.6251638269986893e-05, |
|
"loss": 3.3096, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.013409925125797566, |
|
"grad_norm": 9.896846771240234, |
|
"learning_rate": 1.7267365661861077e-05, |
|
"loss": 3.2666, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.014198744250844482, |
|
"grad_norm": 10.563855171203613, |
|
"learning_rate": 1.8283093053735257e-05, |
|
"loss": 3.1977, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.014987563375891397, |
|
"grad_norm": 8.470562934875488, |
|
"learning_rate": 1.9298820445609438e-05, |
|
"loss": 3.1561, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.015776382500938314, |
|
"grad_norm": 7.783871173858643, |
|
"learning_rate": 2.031454783748362e-05, |
|
"loss": 3.1059, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.016565201625985227, |
|
"grad_norm": 7.2594194412231445, |
|
"learning_rate": 2.13302752293578e-05, |
|
"loss": 3.0697, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.017354020751032144, |
|
"grad_norm": 6.209031581878662, |
|
"learning_rate": 2.234600262123198e-05, |
|
"loss": 3.0443, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.01814283987607906, |
|
"grad_norm": 8.110218048095703, |
|
"learning_rate": 2.336173001310616e-05, |
|
"loss": 2.9893, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.018931659001125974, |
|
"grad_norm": 6.372434616088867, |
|
"learning_rate": 2.437745740498034e-05, |
|
"loss": 2.9522, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.01972047812617289, |
|
"grad_norm": 6.396059989929199, |
|
"learning_rate": 2.5393184796854525e-05, |
|
"loss": 2.9286, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.020509297251219808, |
|
"grad_norm": 5.576442718505859, |
|
"learning_rate": 2.6408912188728702e-05, |
|
"loss": 2.8986, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.02129811637626672, |
|
"grad_norm": 7.455265045166016, |
|
"learning_rate": 2.7424639580602886e-05, |
|
"loss": 2.8346, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.02208693550131364, |
|
"grad_norm": 5.682501792907715, |
|
"learning_rate": 2.8440366972477066e-05, |
|
"loss": 2.8033, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.022875754626360555, |
|
"grad_norm": 6.197375297546387, |
|
"learning_rate": 2.9456094364351244e-05, |
|
"loss": 2.8146, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.02366457375140747, |
|
"grad_norm": 6.124543190002441, |
|
"learning_rate": 3.0471821756225428e-05, |
|
"loss": 2.7633, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.024453392876454386, |
|
"grad_norm": 5.52219820022583, |
|
"learning_rate": 3.148754914809961e-05, |
|
"loss": 2.7328, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.025242212001501303, |
|
"grad_norm": 5.2266950607299805, |
|
"learning_rate": 3.2503276539973785e-05, |
|
"loss": 2.7086, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.026031031126548216, |
|
"grad_norm": 4.860825538635254, |
|
"learning_rate": 3.351900393184797e-05, |
|
"loss": 2.6655, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.026819850251595133, |
|
"grad_norm": 4.891534805297852, |
|
"learning_rate": 3.453473132372215e-05, |
|
"loss": 2.6643, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.027608669376642046, |
|
"grad_norm": 4.9125566482543945, |
|
"learning_rate": 3.555045871559633e-05, |
|
"loss": 2.6084, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.028397488501688963, |
|
"grad_norm": 4.432997703552246, |
|
"learning_rate": 3.6566186107470514e-05, |
|
"loss": 2.5912, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.02918630762673588, |
|
"grad_norm": 4.265169143676758, |
|
"learning_rate": 3.7581913499344695e-05, |
|
"loss": 2.574, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.029975126751782794, |
|
"grad_norm": 4.731688022613525, |
|
"learning_rate": 3.8597640891218876e-05, |
|
"loss": 2.548, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.03076394587682971, |
|
"grad_norm": 4.037458419799805, |
|
"learning_rate": 3.9613368283093056e-05, |
|
"loss": 2.5411, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.03155276500187663, |
|
"grad_norm": 4.526784896850586, |
|
"learning_rate": 4.062909567496724e-05, |
|
"loss": 2.5111, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.03234158412692354, |
|
"grad_norm": 4.06175422668457, |
|
"learning_rate": 4.164482306684142e-05, |
|
"loss": 2.519, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.033130403251970454, |
|
"grad_norm": 4.599717617034912, |
|
"learning_rate": 4.26605504587156e-05, |
|
"loss": 2.4858, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.033919222377017375, |
|
"grad_norm": 3.97731876373291, |
|
"learning_rate": 4.367627785058978e-05, |
|
"loss": 2.4902, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.03470804150206429, |
|
"grad_norm": 3.732372522354126, |
|
"learning_rate": 4.469200524246396e-05, |
|
"loss": 2.4718, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.0354968606271112, |
|
"grad_norm": 3.583721160888672, |
|
"learning_rate": 4.570773263433814e-05, |
|
"loss": 2.423, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.03628567975215812, |
|
"grad_norm": 3.5558745861053467, |
|
"learning_rate": 4.672346002621232e-05, |
|
"loss": 2.4137, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.037074498877205035, |
|
"grad_norm": 3.757810115814209, |
|
"learning_rate": 4.77391874180865e-05, |
|
"loss": 2.4368, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 0.03786331800225195, |
|
"grad_norm": 3.6044111251831055, |
|
"learning_rate": 4.875491480996068e-05, |
|
"loss": 2.4129, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.03865213712729887, |
|
"grad_norm": 3.366729736328125, |
|
"learning_rate": 4.977064220183487e-05, |
|
"loss": 2.3889, |
|
"step": 1519 |
|
}, |
|
{ |
|
"epoch": 0.03944095625234578, |
|
"grad_norm": 3.2331082820892334, |
|
"learning_rate": 4.9999915451558777e-05, |
|
"loss": 2.3745, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.040229775377392696, |
|
"grad_norm": 3.150716781616211, |
|
"learning_rate": 4.999955597496219e-05, |
|
"loss": 2.3588, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 0.041018594502439616, |
|
"grad_norm": 3.4237565994262695, |
|
"learning_rate": 4.9998914381774255e-05, |
|
"loss": 2.3639, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.04180741362748653, |
|
"grad_norm": 2.9677655696868896, |
|
"learning_rate": 4.999799067923527e-05, |
|
"loss": 2.3587, |
|
"step": 1643 |
|
}, |
|
{ |
|
"epoch": 0.04259623275253344, |
|
"grad_norm": 3.1596009731292725, |
|
"learning_rate": 4.999678487776908e-05, |
|
"loss": 2.3365, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 0.04338505187758036, |
|
"grad_norm": 3.4485344886779785, |
|
"learning_rate": 4.9995296990983006e-05, |
|
"loss": 2.3353, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.04417387100262728, |
|
"grad_norm": 3.016425848007202, |
|
"learning_rate": 4.999352703566763e-05, |
|
"loss": 2.3172, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 0.04496269012767419, |
|
"grad_norm": 2.8494279384613037, |
|
"learning_rate": 4.999147503179668e-05, |
|
"loss": 2.3241, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 0.04575150925272111, |
|
"grad_norm": 2.7547197341918945, |
|
"learning_rate": 4.998914100252672e-05, |
|
"loss": 2.301, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 0.046540328377768024, |
|
"grad_norm": 2.7102956771850586, |
|
"learning_rate": 4.998652497419696e-05, |
|
"loss": 2.3046, |
|
"step": 1829 |
|
}, |
|
{ |
|
"epoch": 0.04732914750281494, |
|
"grad_norm": 2.5211455821990967, |
|
"learning_rate": 4.9983626976328927e-05, |
|
"loss": 2.2794, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.04811796662786186, |
|
"grad_norm": 2.7271173000335693, |
|
"learning_rate": 4.998044704162613e-05, |
|
"loss": 2.2554, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 0.04890678575290877, |
|
"grad_norm": 2.850342273712158, |
|
"learning_rate": 4.9976985205973705e-05, |
|
"loss": 2.2722, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 0.049695604877955685, |
|
"grad_norm": 2.6712722778320312, |
|
"learning_rate": 4.997324150843799e-05, |
|
"loss": 2.2412, |
|
"step": 1953 |
|
}, |
|
{ |
|
"epoch": 0.050484424003002605, |
|
"grad_norm": 2.6086056232452393, |
|
"learning_rate": 4.99692159912661e-05, |
|
"loss": 2.2724, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.05127324312804952, |
|
"grad_norm": 2.495509147644043, |
|
"learning_rate": 4.996490869988546e-05, |
|
"loss": 2.2588, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.05206206225309643, |
|
"grad_norm": 2.671813488006592, |
|
"learning_rate": 4.996031968290326e-05, |
|
"loss": 2.2334, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 0.052850881378143345, |
|
"grad_norm": 2.6381373405456543, |
|
"learning_rate": 4.995544899210594e-05, |
|
"loss": 2.2361, |
|
"step": 2077 |
|
}, |
|
{ |
|
"epoch": 0.053639700503190266, |
|
"grad_norm": 2.54028058052063, |
|
"learning_rate": 4.9950296682458583e-05, |
|
"loss": 2.2216, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 0.05442851962823718, |
|
"grad_norm": 2.67765212059021, |
|
"learning_rate": 4.994486281210429e-05, |
|
"loss": 2.2064, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 0.05521733875328409, |
|
"grad_norm": 2.570333957672119, |
|
"learning_rate": 4.9939147442363566e-05, |
|
"loss": 2.2334, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.05600615787833101, |
|
"grad_norm": 2.4661099910736084, |
|
"learning_rate": 4.9933150637733574e-05, |
|
"loss": 2.1975, |
|
"step": 2201 |
|
}, |
|
{ |
|
"epoch": 0.056794977003377926, |
|
"grad_norm": 2.5485525131225586, |
|
"learning_rate": 4.992687246588743e-05, |
|
"loss": 2.1883, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 0.05758379612842484, |
|
"grad_norm": 2.471689462661743, |
|
"learning_rate": 4.992031299767347e-05, |
|
"loss": 2.1976, |
|
"step": 2263 |
|
}, |
|
{ |
|
"epoch": 0.05837261525347176, |
|
"grad_norm": 2.4743897914886475, |
|
"learning_rate": 4.9913472307114386e-05, |
|
"loss": 2.175, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 0.059161434378518674, |
|
"grad_norm": 2.445451259613037, |
|
"learning_rate": 4.9906350471406446e-05, |
|
"loss": 2.1683, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.05995025350356559, |
|
"grad_norm": 2.406235456466675, |
|
"learning_rate": 4.989894757091861e-05, |
|
"loss": 2.1685, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 0.06073907262861251, |
|
"grad_norm": 2.2473762035369873, |
|
"learning_rate": 4.989126368919158e-05, |
|
"loss": 2.191, |
|
"step": 2387 |
|
}, |
|
{ |
|
"epoch": 0.06152789175365942, |
|
"grad_norm": 2.3035123348236084, |
|
"learning_rate": 4.988329891293693e-05, |
|
"loss": 2.1702, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 0.062316710878706334, |
|
"grad_norm": 2.3511545658111572, |
|
"learning_rate": 4.987505333203608e-05, |
|
"loss": 2.1565, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 0.06310553000375325, |
|
"grad_norm": 2.2300775051116943, |
|
"learning_rate": 4.9866527039539276e-05, |
|
"loss": 2.1558, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.06389434912880017, |
|
"grad_norm": 2.3471133708953857, |
|
"learning_rate": 4.9857720131664594e-05, |
|
"loss": 2.1516, |
|
"step": 2511 |
|
}, |
|
{ |
|
"epoch": 0.06468316825384708, |
|
"grad_norm": 2.3770556449890137, |
|
"learning_rate": 4.9848632707796773e-05, |
|
"loss": 2.1384, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 0.065471987378894, |
|
"grad_norm": 2.235302209854126, |
|
"learning_rate": 4.9839264870486155e-05, |
|
"loss": 2.1287, |
|
"step": 2573 |
|
}, |
|
{ |
|
"epoch": 0.06626080650394091, |
|
"grad_norm": 2.3938326835632324, |
|
"learning_rate": 4.9829616725447526e-05, |
|
"loss": 2.1387, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 0.06704962562898784, |
|
"grad_norm": 2.177912712097168, |
|
"learning_rate": 4.981968838155888e-05, |
|
"loss": 2.1126, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.06783844475403475, |
|
"grad_norm": 2.2535035610198975, |
|
"learning_rate": 4.980947995086024e-05, |
|
"loss": 2.126, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 0.06862726387908166, |
|
"grad_norm": 2.14459228515625, |
|
"learning_rate": 4.979899154855234e-05, |
|
"loss": 2.1334, |
|
"step": 2697 |
|
}, |
|
{ |
|
"epoch": 0.06941608300412858, |
|
"grad_norm": 2.156930446624756, |
|
"learning_rate": 4.9788223292995386e-05, |
|
"loss": 2.1121, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 0.07020490212917549, |
|
"grad_norm": 2.344521999359131, |
|
"learning_rate": 4.977717530570768e-05, |
|
"loss": 2.123, |
|
"step": 2759 |
|
}, |
|
{ |
|
"epoch": 0.0709937212542224, |
|
"grad_norm": 2.1548428535461426, |
|
"learning_rate": 4.976584771136425e-05, |
|
"loss": 2.113, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.07178254037926933, |
|
"grad_norm": 2.2467427253723145, |
|
"learning_rate": 4.975424063779547e-05, |
|
"loss": 2.1048, |
|
"step": 2821 |
|
}, |
|
{ |
|
"epoch": 0.07257135950431624, |
|
"grad_norm": 2.601376533508301, |
|
"learning_rate": 4.974235421598557e-05, |
|
"loss": 2.1144, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 0.07336017862936316, |
|
"grad_norm": 2.143150568008423, |
|
"learning_rate": 4.973018858007122e-05, |
|
"loss": 2.0793, |
|
"step": 2883 |
|
}, |
|
{ |
|
"epoch": 0.07414899775441007, |
|
"grad_norm": 2.1503472328186035, |
|
"learning_rate": 4.9717743867339963e-05, |
|
"loss": 2.0996, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 0.07493781687945698, |
|
"grad_norm": 2.1850991249084473, |
|
"learning_rate": 4.9705020218228695e-05, |
|
"loss": 2.0912, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.0757266360045039, |
|
"grad_norm": 2.207058906555176, |
|
"learning_rate": 4.969201777632205e-05, |
|
"loss": 2.104, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.07651545512955082, |
|
"grad_norm": 2.1132214069366455, |
|
"learning_rate": 4.9678736688350846e-05, |
|
"loss": 2.0857, |
|
"step": 3007 |
|
}, |
|
{ |
|
"epoch": 0.07730427425459774, |
|
"grad_norm": 4.404266834259033, |
|
"learning_rate": 4.966517710419033e-05, |
|
"loss": 2.0888, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 0.07809309337964465, |
|
"grad_norm": 2.0443875789642334, |
|
"learning_rate": 4.965133917685858e-05, |
|
"loss": 2.0688, |
|
"step": 3069 |
|
}, |
|
{ |
|
"epoch": 0.07888191250469156, |
|
"grad_norm": 1.9510438442230225, |
|
"learning_rate": 4.9637223062514714e-05, |
|
"loss": 2.0891, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07967073162973848, |
|
"grad_norm": 2.089725971221924, |
|
"learning_rate": 4.962282892045718e-05, |
|
"loss": 2.0949, |
|
"step": 3131 |
|
}, |
|
{ |
|
"epoch": 0.08045955075478539, |
|
"grad_norm": 2.1167550086975098, |
|
"learning_rate": 4.9608156913121904e-05, |
|
"loss": 2.0911, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 0.08124836987983232, |
|
"grad_norm": 2.2896945476531982, |
|
"learning_rate": 4.959320720608049e-05, |
|
"loss": 2.0709, |
|
"step": 3193 |
|
}, |
|
{ |
|
"epoch": 0.08203718900487923, |
|
"grad_norm": 2.0464134216308594, |
|
"learning_rate": 4.9577979968038354e-05, |
|
"loss": 2.0743, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 0.08282600812992615, |
|
"grad_norm": 2.0745983123779297, |
|
"learning_rate": 4.956247537083282e-05, |
|
"loss": 2.0644, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.08361482725497306, |
|
"grad_norm": 2.1493799686431885, |
|
"learning_rate": 4.9546693589431145e-05, |
|
"loss": 2.0651, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 0.08440364638001997, |
|
"grad_norm": 2.07473087310791, |
|
"learning_rate": 4.9530634801928595e-05, |
|
"loss": 2.0354, |
|
"step": 3317 |
|
}, |
|
{ |
|
"epoch": 0.08519246550506689, |
|
"grad_norm": 1.962320327758789, |
|
"learning_rate": 4.9514299189546395e-05, |
|
"loss": 2.0664, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 0.0859812846301138, |
|
"grad_norm": 1.9907017946243286, |
|
"learning_rate": 4.949768693662973e-05, |
|
"loss": 2.0772, |
|
"step": 3379 |
|
}, |
|
{ |
|
"epoch": 0.08677010375516073, |
|
"grad_norm": 2.2022948265075684, |
|
"learning_rate": 4.948079823064559e-05, |
|
"loss": 2.0629, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.08755892288020764, |
|
"grad_norm": 1.9094113111495972, |
|
"learning_rate": 4.946363326218074e-05, |
|
"loss": 2.0516, |
|
"step": 3441 |
|
}, |
|
{ |
|
"epoch": 0.08834774200525455, |
|
"grad_norm": 2.16923189163208, |
|
"learning_rate": 4.9446192224939525e-05, |
|
"loss": 2.0712, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 0.08913656113030147, |
|
"grad_norm": 2.2181923389434814, |
|
"learning_rate": 4.942847531574167e-05, |
|
"loss": 2.0476, |
|
"step": 3503 |
|
}, |
|
{ |
|
"epoch": 0.08992538025534838, |
|
"grad_norm": 2.0701868534088135, |
|
"learning_rate": 4.941048273452008e-05, |
|
"loss": 2.0662, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 0.0907141993803953, |
|
"grad_norm": 2.1713969707489014, |
|
"learning_rate": 4.9392214684318605e-05, |
|
"loss": 2.0316, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.09150301850544222, |
|
"grad_norm": 1.8713674545288086, |
|
"learning_rate": 4.93736713712897e-05, |
|
"loss": 2.0276, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 0.09229183763048913, |
|
"grad_norm": 1.957153081893921, |
|
"learning_rate": 4.9354853004692124e-05, |
|
"loss": 2.0423, |
|
"step": 3627 |
|
}, |
|
{ |
|
"epoch": 0.09308065675553605, |
|
"grad_norm": 2.025723457336426, |
|
"learning_rate": 4.93357597968886e-05, |
|
"loss": 2.0361, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 0.09386947588058296, |
|
"grad_norm": 1.9253807067871094, |
|
"learning_rate": 4.931639196334338e-05, |
|
"loss": 2.0254, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 0.09465829500562987, |
|
"grad_norm": 2.1453866958618164, |
|
"learning_rate": 4.9296749722619826e-05, |
|
"loss": 2.0434, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.09544711413067679, |
|
"grad_norm": 2.116746187210083, |
|
"learning_rate": 4.9276833296377966e-05, |
|
"loss": 2.0535, |
|
"step": 3751 |
|
}, |
|
{ |
|
"epoch": 0.09623593325572372, |
|
"grad_norm": 2.060053825378418, |
|
"learning_rate": 4.925664290937196e-05, |
|
"loss": 2.0162, |
|
"step": 3782 |
|
}, |
|
{ |
|
"epoch": 0.09702475238077063, |
|
"grad_norm": 2.033005952835083, |
|
"learning_rate": 4.9236178789447576e-05, |
|
"loss": 2.0377, |
|
"step": 3813 |
|
}, |
|
{ |
|
"epoch": 0.09781357150581754, |
|
"grad_norm": 1.930992841720581, |
|
"learning_rate": 4.921544116753962e-05, |
|
"loss": 2.0091, |
|
"step": 3844 |
|
}, |
|
{ |
|
"epoch": 0.09860239063086446, |
|
"grad_norm": 1.987481951713562, |
|
"learning_rate": 4.919443027766935e-05, |
|
"loss": 2.0191, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.09939120975591137, |
|
"grad_norm": 2.1110246181488037, |
|
"learning_rate": 4.91731463569418e-05, |
|
"loss": 2.015, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 0.10018002888095828, |
|
"grad_norm": 2.009760618209839, |
|
"learning_rate": 4.915158964554312e-05, |
|
"loss": 2.0196, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 0.10096884800600521, |
|
"grad_norm": 1.9978469610214233, |
|
"learning_rate": 4.912976038673786e-05, |
|
"loss": 2.0095, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.10175766713105212, |
|
"grad_norm": 1.9885358810424805, |
|
"learning_rate": 4.9107658826866254e-05, |
|
"loss": 2.0148, |
|
"step": 3999 |
|
}, |
|
{ |
|
"epoch": 0.10254648625609904, |
|
"grad_norm": 1.9853264093399048, |
|
"learning_rate": 4.908528521534139e-05, |
|
"loss": 2.014, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.10333530538114595, |
|
"grad_norm": 1.9636098146438599, |
|
"learning_rate": 4.906263980464644e-05, |
|
"loss": 2.0197, |
|
"step": 4061 |
|
}, |
|
{ |
|
"epoch": 0.10412412450619286, |
|
"grad_norm": 1.9954477548599243, |
|
"learning_rate": 4.903972285033178e-05, |
|
"loss": 1.9974, |
|
"step": 4092 |
|
}, |
|
{ |
|
"epoch": 0.10491294363123978, |
|
"grad_norm": 1.9644993543624878, |
|
"learning_rate": 4.901653461101213e-05, |
|
"loss": 2.0035, |
|
"step": 4123 |
|
}, |
|
{ |
|
"epoch": 0.10570176275628669, |
|
"grad_norm": 1.9775594472885132, |
|
"learning_rate": 4.8993075348363626e-05, |
|
"loss": 2.0108, |
|
"step": 4154 |
|
}, |
|
{ |
|
"epoch": 0.10649058188133362, |
|
"grad_norm": 1.9478306770324707, |
|
"learning_rate": 4.896934532712084e-05, |
|
"loss": 1.992, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.10727940100638053, |
|
"grad_norm": 2.2822272777557373, |
|
"learning_rate": 4.8945344815073846e-05, |
|
"loss": 2.0038, |
|
"step": 4216 |
|
}, |
|
{ |
|
"epoch": 0.10806822013142744, |
|
"grad_norm": 1.9325686693191528, |
|
"learning_rate": 4.892107408306516e-05, |
|
"loss": 1.9914, |
|
"step": 4247 |
|
}, |
|
{ |
|
"epoch": 0.10885703925647436, |
|
"grad_norm": 1.9367748498916626, |
|
"learning_rate": 4.889653340498669e-05, |
|
"loss": 2.0012, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 0.10964585838152127, |
|
"grad_norm": 1.9141064882278442, |
|
"learning_rate": 4.8871723057776664e-05, |
|
"loss": 2.0192, |
|
"step": 4309 |
|
}, |
|
{ |
|
"epoch": 0.11043467750656819, |
|
"grad_norm": 2.1826133728027344, |
|
"learning_rate": 4.8846643321416476e-05, |
|
"loss": 2.0014, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.11122349663161511, |
|
"grad_norm": 1.9631683826446533, |
|
"learning_rate": 4.882129447892753e-05, |
|
"loss": 1.9848, |
|
"step": 4371 |
|
}, |
|
{ |
|
"epoch": 0.11201231575666203, |
|
"grad_norm": 1.969335913658142, |
|
"learning_rate": 4.8795676816368076e-05, |
|
"loss": 1.9832, |
|
"step": 4402 |
|
}, |
|
{ |
|
"epoch": 0.11280113488170894, |
|
"grad_norm": 1.8996473550796509, |
|
"learning_rate": 4.876979062282995e-05, |
|
"loss": 1.9866, |
|
"step": 4433 |
|
}, |
|
{ |
|
"epoch": 0.11358995400675585, |
|
"grad_norm": 1.9162741899490356, |
|
"learning_rate": 4.8743636190435325e-05, |
|
"loss": 1.9985, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 0.11437877313180277, |
|
"grad_norm": 1.9941459894180298, |
|
"learning_rate": 4.871721381433344e-05, |
|
"loss": 1.9831, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.11516759225684968, |
|
"grad_norm": 1.7893972396850586, |
|
"learning_rate": 4.869052379269719e-05, |
|
"loss": 1.9965, |
|
"step": 4526 |
|
}, |
|
{ |
|
"epoch": 0.1159564113818966, |
|
"grad_norm": 1.8150454759597778, |
|
"learning_rate": 4.866356642671985e-05, |
|
"loss": 1.9814, |
|
"step": 4557 |
|
}, |
|
{ |
|
"epoch": 0.11674523050694352, |
|
"grad_norm": 1.9319466352462769, |
|
"learning_rate": 4.8636342020611634e-05, |
|
"loss": 1.9761, |
|
"step": 4588 |
|
}, |
|
{ |
|
"epoch": 0.11753404963199043, |
|
"grad_norm": 1.7858814001083374, |
|
"learning_rate": 4.860885088159626e-05, |
|
"loss": 1.9912, |
|
"step": 4619 |
|
}, |
|
{ |
|
"epoch": 0.11832286875703735, |
|
"grad_norm": 1.8554847240447998, |
|
"learning_rate": 4.858109331990751e-05, |
|
"loss": 1.9709, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.11911168788208426, |
|
"grad_norm": 1.8804956674575806, |
|
"learning_rate": 4.855306964878567e-05, |
|
"loss": 2.0065, |
|
"step": 4681 |
|
}, |
|
{ |
|
"epoch": 0.11990050700713117, |
|
"grad_norm": 1.8334413766860962, |
|
"learning_rate": 4.8524780184474084e-05, |
|
"loss": 1.9883, |
|
"step": 4712 |
|
}, |
|
{ |
|
"epoch": 0.1206893261321781, |
|
"grad_norm": 2.0563790798187256, |
|
"learning_rate": 4.8496225246215496e-05, |
|
"loss": 1.9805, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 0.12147814525722501, |
|
"grad_norm": 1.7843685150146484, |
|
"learning_rate": 4.8467405156248505e-05, |
|
"loss": 1.9732, |
|
"step": 4774 |
|
}, |
|
{ |
|
"epoch": 0.12226696438227193, |
|
"grad_norm": 1.8490878343582153, |
|
"learning_rate": 4.843832023980392e-05, |
|
"loss": 1.9955, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.12305578350731884, |
|
"grad_norm": 1.6674724817276, |
|
"learning_rate": 4.840897082510106e-05, |
|
"loss": 1.9837, |
|
"step": 4836 |
|
}, |
|
{ |
|
"epoch": 0.12384460263236576, |
|
"grad_norm": 1.7055600881576538, |
|
"learning_rate": 4.8379357243344084e-05, |
|
"loss": 1.9782, |
|
"step": 4867 |
|
}, |
|
{ |
|
"epoch": 0.12463342175741267, |
|
"grad_norm": 1.7836189270019531, |
|
"learning_rate": 4.8349479828718236e-05, |
|
"loss": 1.9696, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 0.1254222408824596, |
|
"grad_norm": 1.8213683366775513, |
|
"learning_rate": 4.8319338918386075e-05, |
|
"loss": 1.9689, |
|
"step": 4929 |
|
}, |
|
{ |
|
"epoch": 0.1262110600075065, |
|
"grad_norm": 1.7552939653396606, |
|
"learning_rate": 4.828893485248369e-05, |
|
"loss": 2.0059, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.12699987913255342, |
|
"grad_norm": 1.7021842002868652, |
|
"learning_rate": 4.825826797411682e-05, |
|
"loss": 1.966, |
|
"step": 4991 |
|
}, |
|
{ |
|
"epoch": 0.12778869825760034, |
|
"grad_norm": 2.696760416030884, |
|
"learning_rate": 4.822733862935702e-05, |
|
"loss": 1.9909, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 0.12857751738264725, |
|
"grad_norm": 1.8321341276168823, |
|
"learning_rate": 4.819614716723775e-05, |
|
"loss": 1.967, |
|
"step": 5053 |
|
}, |
|
{ |
|
"epoch": 0.12936633650769416, |
|
"grad_norm": 2.1158053874969482, |
|
"learning_rate": 4.8164693939750425e-05, |
|
"loss": 1.9692, |
|
"step": 5084 |
|
}, |
|
{ |
|
"epoch": 0.13015515563274108, |
|
"grad_norm": 1.9149221181869507, |
|
"learning_rate": 4.813297930184042e-05, |
|
"loss": 1.9676, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.130943974757788, |
|
"grad_norm": 1.7447597980499268, |
|
"learning_rate": 4.810100361140314e-05, |
|
"loss": 1.9538, |
|
"step": 5146 |
|
}, |
|
{ |
|
"epoch": 0.1317327938828349, |
|
"grad_norm": 1.7692095041275024, |
|
"learning_rate": 4.8068767229279885e-05, |
|
"loss": 1.9612, |
|
"step": 5177 |
|
}, |
|
{ |
|
"epoch": 0.13252161300788182, |
|
"grad_norm": 1.8180122375488281, |
|
"learning_rate": 4.8036270519253854e-05, |
|
"loss": 1.9679, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 0.13331043213292876, |
|
"grad_norm": 1.7397531270980835, |
|
"learning_rate": 4.8003513848046e-05, |
|
"loss": 1.9483, |
|
"step": 5239 |
|
}, |
|
{ |
|
"epoch": 0.13409925125797567, |
|
"grad_norm": 1.773038387298584, |
|
"learning_rate": 4.79704975853109e-05, |
|
"loss": 1.9706, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.13488807038302258, |
|
"grad_norm": 1.846821904182434, |
|
"learning_rate": 4.793722210363262e-05, |
|
"loss": 1.9557, |
|
"step": 5301 |
|
}, |
|
{ |
|
"epoch": 0.1356768895080695, |
|
"grad_norm": 1.8578094244003296, |
|
"learning_rate": 4.7903687778520414e-05, |
|
"loss": 1.9613, |
|
"step": 5332 |
|
}, |
|
{ |
|
"epoch": 0.1364657086331164, |
|
"grad_norm": 1.6778779029846191, |
|
"learning_rate": 4.7869894988404593e-05, |
|
"loss": 1.9481, |
|
"step": 5363 |
|
}, |
|
{ |
|
"epoch": 0.13725452775816332, |
|
"grad_norm": 1.8821789026260376, |
|
"learning_rate": 4.783584411463221e-05, |
|
"loss": 1.9475, |
|
"step": 5394 |
|
}, |
|
{ |
|
"epoch": 0.13804334688321024, |
|
"grad_norm": 1.7182306051254272, |
|
"learning_rate": 4.780153554146274e-05, |
|
"loss": 1.9541, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.13883216600825715, |
|
"grad_norm": 1.9063465595245361, |
|
"learning_rate": 4.7766969656063766e-05, |
|
"loss": 1.9475, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.13962098513330407, |
|
"grad_norm": 1.8275973796844482, |
|
"learning_rate": 4.773214684850662e-05, |
|
"loss": 1.9672, |
|
"step": 5487 |
|
}, |
|
{ |
|
"epoch": 0.14040980425835098, |
|
"grad_norm": 1.798607349395752, |
|
"learning_rate": 4.769706751176193e-05, |
|
"loss": 1.9603, |
|
"step": 5518 |
|
}, |
|
{ |
|
"epoch": 0.1411986233833979, |
|
"grad_norm": 1.9662116765975952, |
|
"learning_rate": 4.7661732041695264e-05, |
|
"loss": 1.959, |
|
"step": 5549 |
|
}, |
|
{ |
|
"epoch": 0.1419874425084448, |
|
"grad_norm": 1.9336684942245483, |
|
"learning_rate": 4.762614083706258e-05, |
|
"loss": 1.9427, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.14277626163349175, |
|
"grad_norm": 1.68619704246521, |
|
"learning_rate": 4.759029429950581e-05, |
|
"loss": 1.9459, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 0.14356508075853866, |
|
"grad_norm": 1.9060170650482178, |
|
"learning_rate": 4.7554192833548235e-05, |
|
"loss": 1.9522, |
|
"step": 5642 |
|
}, |
|
{ |
|
"epoch": 0.14435389988358557, |
|
"grad_norm": 1.7184319496154785, |
|
"learning_rate": 4.751783684659e-05, |
|
"loss": 1.9253, |
|
"step": 5673 |
|
}, |
|
{ |
|
"epoch": 0.1451427190086325, |
|
"grad_norm": 1.7833524942398071, |
|
"learning_rate": 4.748122674890348e-05, |
|
"loss": 1.9484, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 0.1459315381336794, |
|
"grad_norm": 1.8405711650848389, |
|
"learning_rate": 4.7444362953628654e-05, |
|
"loss": 1.9576, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.1467203572587263, |
|
"grad_norm": 1.7746304273605347, |
|
"learning_rate": 4.7407245876768424e-05, |
|
"loss": 1.9549, |
|
"step": 5766 |
|
}, |
|
{ |
|
"epoch": 0.14750917638377323, |
|
"grad_norm": 1.6882883310317993, |
|
"learning_rate": 4.736987593718397e-05, |
|
"loss": 1.9407, |
|
"step": 5797 |
|
}, |
|
{ |
|
"epoch": 0.14829799550882014, |
|
"grad_norm": 1.6760326623916626, |
|
"learning_rate": 4.733225355658999e-05, |
|
"loss": 1.9358, |
|
"step": 5828 |
|
}, |
|
{ |
|
"epoch": 0.14908681463386705, |
|
"grad_norm": 1.6274425983428955, |
|
"learning_rate": 4.7294379159549926e-05, |
|
"loss": 1.929, |
|
"step": 5859 |
|
}, |
|
{ |
|
"epoch": 0.14987563375891397, |
|
"grad_norm": 1.8570448160171509, |
|
"learning_rate": 4.725625317347119e-05, |
|
"loss": 1.926, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.15066445288396088, |
|
"grad_norm": 1.754744052886963, |
|
"learning_rate": 4.7217876028600374e-05, |
|
"loss": 1.948, |
|
"step": 5921 |
|
}, |
|
{ |
|
"epoch": 0.1514532720090078, |
|
"grad_norm": 1.7100951671600342, |
|
"learning_rate": 4.717924815801832e-05, |
|
"loss": 1.9527, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.1522420911340547, |
|
"grad_norm": 1.731489658355713, |
|
"learning_rate": 4.714036999763532e-05, |
|
"loss": 1.933, |
|
"step": 5983 |
|
}, |
|
{ |
|
"epoch": 0.15303091025910165, |
|
"grad_norm": 1.7440745830535889, |
|
"learning_rate": 4.7101241986186116e-05, |
|
"loss": 1.9214, |
|
"step": 6014 |
|
}, |
|
{ |
|
"epoch": 0.15381972938414856, |
|
"grad_norm": 1.7062227725982666, |
|
"learning_rate": 4.7061864565225e-05, |
|
"loss": 1.9381, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.15460854850919548, |
|
"grad_norm": 1.6533842086791992, |
|
"learning_rate": 4.702223817912081e-05, |
|
"loss": 1.9097, |
|
"step": 6076 |
|
}, |
|
{ |
|
"epoch": 0.1553973676342424, |
|
"grad_norm": 1.8380335569381714, |
|
"learning_rate": 4.698236327505195e-05, |
|
"loss": 1.9357, |
|
"step": 6107 |
|
}, |
|
{ |
|
"epoch": 0.1561861867592893, |
|
"grad_norm": 1.7769367694854736, |
|
"learning_rate": 4.694224030300127e-05, |
|
"loss": 1.9176, |
|
"step": 6138 |
|
}, |
|
{ |
|
"epoch": 0.15697500588433622, |
|
"grad_norm": 1.697293758392334, |
|
"learning_rate": 4.690186971575107e-05, |
|
"loss": 1.9242, |
|
"step": 6169 |
|
}, |
|
{ |
|
"epoch": 0.15776382500938313, |
|
"grad_norm": 1.6948884725570679, |
|
"learning_rate": 4.6861251968877916e-05, |
|
"loss": 1.9277, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.15855264413443004, |
|
"grad_norm": 1.8506945371627808, |
|
"learning_rate": 4.68203875207476e-05, |
|
"loss": 1.9413, |
|
"step": 6231 |
|
}, |
|
{ |
|
"epoch": 0.15934146325947696, |
|
"grad_norm": 1.6183403730392456, |
|
"learning_rate": 4.677927683250983e-05, |
|
"loss": 1.929, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 0.16013028238452387, |
|
"grad_norm": 1.7411370277404785, |
|
"learning_rate": 4.6737920368093156e-05, |
|
"loss": 1.9272, |
|
"step": 6293 |
|
}, |
|
{ |
|
"epoch": 0.16091910150957078, |
|
"grad_norm": 1.6963975429534912, |
|
"learning_rate": 4.669631859419965e-05, |
|
"loss": 1.9183, |
|
"step": 6324 |
|
}, |
|
{ |
|
"epoch": 0.1617079206346177, |
|
"grad_norm": 1.664981484413147, |
|
"learning_rate": 4.6654471980299676e-05, |
|
"loss": 1.9358, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 0.16249673975966464, |
|
"grad_norm": 1.8351246118545532, |
|
"learning_rate": 4.661238099862658e-05, |
|
"loss": 1.9126, |
|
"step": 6386 |
|
}, |
|
{ |
|
"epoch": 0.16328555888471155, |
|
"grad_norm": 1.8973945379257202, |
|
"learning_rate": 4.657004612417138e-05, |
|
"loss": 1.9171, |
|
"step": 6417 |
|
}, |
|
{ |
|
"epoch": 0.16407437800975846, |
|
"grad_norm": 1.7961071729660034, |
|
"learning_rate": 4.6527467834677374e-05, |
|
"loss": 1.9095, |
|
"step": 6448 |
|
}, |
|
{ |
|
"epoch": 0.16486319713480538, |
|
"grad_norm": 1.73550283908844, |
|
"learning_rate": 4.648464661063478e-05, |
|
"loss": 1.924, |
|
"step": 6479 |
|
}, |
|
{ |
|
"epoch": 0.1656520162598523, |
|
"grad_norm": 1.77008056640625, |
|
"learning_rate": 4.6441582935275264e-05, |
|
"loss": 1.9338, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.1664408353848992, |
|
"grad_norm": 1.7121846675872803, |
|
"learning_rate": 4.6398277294566586e-05, |
|
"loss": 1.9094, |
|
"step": 6541 |
|
}, |
|
{ |
|
"epoch": 0.16722965450994612, |
|
"grad_norm": 1.6560605764389038, |
|
"learning_rate": 4.6354730177207e-05, |
|
"loss": 1.9195, |
|
"step": 6572 |
|
}, |
|
{ |
|
"epoch": 0.16801847363499303, |
|
"grad_norm": 1.7044239044189453, |
|
"learning_rate": 4.6310942074619787e-05, |
|
"loss": 1.9197, |
|
"step": 6603 |
|
}, |
|
{ |
|
"epoch": 0.16880729276003995, |
|
"grad_norm": 1.6876695156097412, |
|
"learning_rate": 4.626691348094777e-05, |
|
"loss": 1.9078, |
|
"step": 6634 |
|
}, |
|
{ |
|
"epoch": 0.16959611188508686, |
|
"grad_norm": 1.7991163730621338, |
|
"learning_rate": 4.622264489304762e-05, |
|
"loss": 1.9147, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 0.17038493101013377, |
|
"grad_norm": 1.662376046180725, |
|
"learning_rate": 4.617813681048434e-05, |
|
"loss": 1.9263, |
|
"step": 6696 |
|
}, |
|
{ |
|
"epoch": 0.17117375013518069, |
|
"grad_norm": 1.6112306118011475, |
|
"learning_rate": 4.61333897355256e-05, |
|
"loss": 1.9102, |
|
"step": 6727 |
|
}, |
|
{ |
|
"epoch": 0.1719625692602276, |
|
"grad_norm": 1.757332444190979, |
|
"learning_rate": 4.608840417313604e-05, |
|
"loss": 1.921, |
|
"step": 6758 |
|
}, |
|
{ |
|
"epoch": 0.17275138838527454, |
|
"grad_norm": 1.6604098081588745, |
|
"learning_rate": 4.6043180630971646e-05, |
|
"loss": 1.905, |
|
"step": 6789 |
|
}, |
|
{ |
|
"epoch": 0.17354020751032145, |
|
"grad_norm": 1.6792216300964355, |
|
"learning_rate": 4.599771961937391e-05, |
|
"loss": 1.906, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.17432902663536837, |
|
"grad_norm": 1.635213851928711, |
|
"learning_rate": 4.5952021651364204e-05, |
|
"loss": 1.9038, |
|
"step": 6851 |
|
}, |
|
{ |
|
"epoch": 0.17511784576041528, |
|
"grad_norm": 1.6034295558929443, |
|
"learning_rate": 4.590608724263786e-05, |
|
"loss": 1.9214, |
|
"step": 6882 |
|
}, |
|
{ |
|
"epoch": 0.1759066648854622, |
|
"grad_norm": 1.730025053024292, |
|
"learning_rate": 4.585991691155845e-05, |
|
"loss": 1.9023, |
|
"step": 6913 |
|
}, |
|
{ |
|
"epoch": 0.1766954840105091, |
|
"grad_norm": 1.6403334140777588, |
|
"learning_rate": 4.581351117915188e-05, |
|
"loss": 1.8944, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 0.17748430313555602, |
|
"grad_norm": 1.7030408382415771, |
|
"learning_rate": 4.5766870569100534e-05, |
|
"loss": 1.9082, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.17827312226060293, |
|
"grad_norm": 1.9924838542938232, |
|
"learning_rate": 4.571999560773736e-05, |
|
"loss": 1.9033, |
|
"step": 7006 |
|
}, |
|
{ |
|
"epoch": 0.17906194138564985, |
|
"grad_norm": 1.688637137413025, |
|
"learning_rate": 4.5672886824039915e-05, |
|
"loss": 1.9087, |
|
"step": 7037 |
|
}, |
|
{ |
|
"epoch": 0.17985076051069676, |
|
"grad_norm": 1.7744380235671997, |
|
"learning_rate": 4.5625544749624435e-05, |
|
"loss": 1.8911, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 0.18063957963574367, |
|
"grad_norm": 1.6221119165420532, |
|
"learning_rate": 4.5577969918739794e-05, |
|
"loss": 1.9018, |
|
"step": 7099 |
|
}, |
|
{ |
|
"epoch": 0.1814283987607906, |
|
"grad_norm": 1.6945770978927612, |
|
"learning_rate": 4.5530162868261486e-05, |
|
"loss": 1.9042, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.18221721788583753, |
|
"grad_norm": 1.6812230348587036, |
|
"learning_rate": 4.548212413768558e-05, |
|
"loss": 1.8992, |
|
"step": 7161 |
|
}, |
|
{ |
|
"epoch": 0.18300603701088444, |
|
"grad_norm": 1.5889744758605957, |
|
"learning_rate": 4.543385426912261e-05, |
|
"loss": 1.9132, |
|
"step": 7192 |
|
}, |
|
{ |
|
"epoch": 0.18379485613593136, |
|
"grad_norm": 1.9001593589782715, |
|
"learning_rate": 4.53853538072915e-05, |
|
"loss": 1.9053, |
|
"step": 7223 |
|
}, |
|
{ |
|
"epoch": 0.18458367526097827, |
|
"grad_norm": 1.647740125656128, |
|
"learning_rate": 4.533662329951336e-05, |
|
"loss": 1.9191, |
|
"step": 7254 |
|
}, |
|
{ |
|
"epoch": 0.18537249438602518, |
|
"grad_norm": 1.6723840236663818, |
|
"learning_rate": 4.528766329570536e-05, |
|
"loss": 1.8782, |
|
"step": 7285 |
|
}, |
|
{ |
|
"epoch": 0.1861613135110721, |
|
"grad_norm": 1.668317437171936, |
|
"learning_rate": 4.523847434837447e-05, |
|
"loss": 1.9026, |
|
"step": 7316 |
|
}, |
|
{ |
|
"epoch": 0.186950132636119, |
|
"grad_norm": 1.640189528465271, |
|
"learning_rate": 4.518905701261128e-05, |
|
"loss": 1.9087, |
|
"step": 7347 |
|
}, |
|
{ |
|
"epoch": 0.18773895176116592, |
|
"grad_norm": 1.5775387287139893, |
|
"learning_rate": 4.5139411846083715e-05, |
|
"loss": 1.9129, |
|
"step": 7378 |
|
}, |
|
{ |
|
"epoch": 0.18852777088621284, |
|
"grad_norm": 1.5601975917816162, |
|
"learning_rate": 4.508953940903073e-05, |
|
"loss": 1.9088, |
|
"step": 7409 |
|
}, |
|
{ |
|
"epoch": 0.18931659001125975, |
|
"grad_norm": 1.648223876953125, |
|
"learning_rate": 4.5039440264255994e-05, |
|
"loss": 1.8977, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.19010540913630666, |
|
"grad_norm": 1.6973642110824585, |
|
"learning_rate": 4.498911497712155e-05, |
|
"loss": 1.8849, |
|
"step": 7471 |
|
}, |
|
{ |
|
"epoch": 0.19089422826135358, |
|
"grad_norm": 1.6180808544158936, |
|
"learning_rate": 4.493856411554142e-05, |
|
"loss": 1.8859, |
|
"step": 7502 |
|
}, |
|
{ |
|
"epoch": 0.1916830473864005, |
|
"grad_norm": 1.6768759489059448, |
|
"learning_rate": 4.4887788249975206e-05, |
|
"loss": 1.9005, |
|
"step": 7533 |
|
}, |
|
{ |
|
"epoch": 0.19247186651144743, |
|
"grad_norm": 1.6389315128326416, |
|
"learning_rate": 4.4836787953421656e-05, |
|
"loss": 1.9025, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 0.19326068563649434, |
|
"grad_norm": 1.6695667505264282, |
|
"learning_rate": 4.478556380141218e-05, |
|
"loss": 1.9015, |
|
"step": 7595 |
|
}, |
|
{ |
|
"epoch": 0.19404950476154126, |
|
"grad_norm": 1.634464979171753, |
|
"learning_rate": 4.4734116372004375e-05, |
|
"loss": 1.8848, |
|
"step": 7626 |
|
}, |
|
{ |
|
"epoch": 0.19483832388658817, |
|
"grad_norm": 1.6077677011489868, |
|
"learning_rate": 4.4682446245775477e-05, |
|
"loss": 1.8944, |
|
"step": 7657 |
|
}, |
|
{ |
|
"epoch": 0.19562714301163509, |
|
"grad_norm": 1.7401186227798462, |
|
"learning_rate": 4.463055400581586e-05, |
|
"loss": 1.8841, |
|
"step": 7688 |
|
}, |
|
{ |
|
"epoch": 0.196415962136682, |
|
"grad_norm": 1.588080644607544, |
|
"learning_rate": 4.4578440237722374e-05, |
|
"loss": 1.8988, |
|
"step": 7719 |
|
}, |
|
{ |
|
"epoch": 0.1972047812617289, |
|
"grad_norm": 1.5799649953842163, |
|
"learning_rate": 4.452610552959183e-05, |
|
"loss": 1.8828, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.19799360038677583, |
|
"grad_norm": 1.6081531047821045, |
|
"learning_rate": 4.447355047201428e-05, |
|
"loss": 1.8967, |
|
"step": 7781 |
|
}, |
|
{ |
|
"epoch": 0.19878241951182274, |
|
"grad_norm": 1.6133239269256592, |
|
"learning_rate": 4.4420775658066414e-05, |
|
"loss": 1.8898, |
|
"step": 7812 |
|
}, |
|
{ |
|
"epoch": 0.19957123863686965, |
|
"grad_norm": 1.612506628036499, |
|
"learning_rate": 4.436778168330484e-05, |
|
"loss": 1.8962, |
|
"step": 7843 |
|
}, |
|
{ |
|
"epoch": 0.20036005776191657, |
|
"grad_norm": 1.6066638231277466, |
|
"learning_rate": 4.4314569145759353e-05, |
|
"loss": 1.8885, |
|
"step": 7874 |
|
}, |
|
{ |
|
"epoch": 0.20114887688696348, |
|
"grad_norm": 1.6667569875717163, |
|
"learning_rate": 4.42611386459262e-05, |
|
"loss": 1.8836, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 0.20193769601201042, |
|
"grad_norm": 1.7658061981201172, |
|
"learning_rate": 4.420749078676133e-05, |
|
"loss": 1.8627, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.20272651513705733, |
|
"grad_norm": 1.5527122020721436, |
|
"learning_rate": 4.4153626173673516e-05, |
|
"loss": 1.8682, |
|
"step": 7967 |
|
}, |
|
{ |
|
"epoch": 0.20351533426210425, |
|
"grad_norm": 1.6022471189498901, |
|
"learning_rate": 4.409954541451762e-05, |
|
"loss": 1.8892, |
|
"step": 7998 |
|
}, |
|
{ |
|
"epoch": 0.20430415338715116, |
|
"grad_norm": 1.6246200799942017, |
|
"learning_rate": 4.404524911958764e-05, |
|
"loss": 1.8703, |
|
"step": 8029 |
|
}, |
|
{ |
|
"epoch": 0.20509297251219807, |
|
"grad_norm": 1.5030767917633057, |
|
"learning_rate": 4.399073790160989e-05, |
|
"loss": 1.8897, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.205881791637245, |
|
"grad_norm": 1.603633165359497, |
|
"learning_rate": 4.393601237573607e-05, |
|
"loss": 1.8655, |
|
"step": 8091 |
|
}, |
|
{ |
|
"epoch": 0.2066706107622919, |
|
"grad_norm": 1.5709718465805054, |
|
"learning_rate": 4.388107315953628e-05, |
|
"loss": 1.8715, |
|
"step": 8122 |
|
}, |
|
{ |
|
"epoch": 0.20745942988733881, |
|
"grad_norm": 1.529145359992981, |
|
"learning_rate": 4.382592087299212e-05, |
|
"loss": 1.8628, |
|
"step": 8153 |
|
}, |
|
{ |
|
"epoch": 0.20824824901238573, |
|
"grad_norm": 1.6858514547348022, |
|
"learning_rate": 4.377055613848964e-05, |
|
"loss": 1.8871, |
|
"step": 8184 |
|
}, |
|
{ |
|
"epoch": 0.20903706813743264, |
|
"grad_norm": 1.7125133275985718, |
|
"learning_rate": 4.3714979580812355e-05, |
|
"loss": 1.8768, |
|
"step": 8215 |
|
}, |
|
{ |
|
"epoch": 0.20982588726247955, |
|
"grad_norm": 1.5397855043411255, |
|
"learning_rate": 4.365919182713416e-05, |
|
"loss": 1.8807, |
|
"step": 8246 |
|
}, |
|
{ |
|
"epoch": 0.21061470638752647, |
|
"grad_norm": 1.6475356817245483, |
|
"learning_rate": 4.360319350701226e-05, |
|
"loss": 1.885, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 0.21140352551257338, |
|
"grad_norm": 1.5485234260559082, |
|
"learning_rate": 4.3546985252380115e-05, |
|
"loss": 1.8777, |
|
"step": 8308 |
|
}, |
|
{ |
|
"epoch": 0.21219234463762032, |
|
"grad_norm": 1.5416793823242188, |
|
"learning_rate": 4.349056769754021e-05, |
|
"loss": 1.8577, |
|
"step": 8339 |
|
}, |
|
{ |
|
"epoch": 0.21298116376266724, |
|
"grad_norm": 1.6852402687072754, |
|
"learning_rate": 4.3433941479156994e-05, |
|
"loss": 1.8807, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.21376998288771415, |
|
"grad_norm": 1.5214046239852905, |
|
"learning_rate": 4.3377107236249647e-05, |
|
"loss": 1.8631, |
|
"step": 8401 |
|
}, |
|
{ |
|
"epoch": 0.21455880201276106, |
|
"grad_norm": 1.6051750183105469, |
|
"learning_rate": 4.332006561018488e-05, |
|
"loss": 1.8684, |
|
"step": 8432 |
|
}, |
|
{ |
|
"epoch": 0.21534762113780798, |
|
"grad_norm": 1.5419507026672363, |
|
"learning_rate": 4.3262817244669683e-05, |
|
"loss": 1.9002, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 0.2161364402628549, |
|
"grad_norm": 1.6802319288253784, |
|
"learning_rate": 4.3205362785744083e-05, |
|
"loss": 1.8577, |
|
"step": 8494 |
|
}, |
|
{ |
|
"epoch": 0.2169252593879018, |
|
"grad_norm": 1.5763416290283203, |
|
"learning_rate": 4.314770288177384e-05, |
|
"loss": 1.865, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.21771407851294872, |
|
"grad_norm": 1.5372315645217896, |
|
"learning_rate": 4.308983818344313e-05, |
|
"loss": 1.8634, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 0.21850289763799563, |
|
"grad_norm": 1.6838102340698242, |
|
"learning_rate": 4.3031769343747206e-05, |
|
"loss": 1.8685, |
|
"step": 8587 |
|
}, |
|
{ |
|
"epoch": 0.21929171676304254, |
|
"grad_norm": 1.6022460460662842, |
|
"learning_rate": 4.297349701798505e-05, |
|
"loss": 1.8593, |
|
"step": 8618 |
|
}, |
|
{ |
|
"epoch": 0.22008053588808946, |
|
"grad_norm": 1.6018834114074707, |
|
"learning_rate": 4.2915021863751916e-05, |
|
"loss": 1.8852, |
|
"step": 8649 |
|
}, |
|
{ |
|
"epoch": 0.22086935501313637, |
|
"grad_norm": 1.5209150314331055, |
|
"learning_rate": 4.285634454093198e-05, |
|
"loss": 1.8522, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.2216581741381833, |
|
"grad_norm": 1.6187845468521118, |
|
"learning_rate": 4.279746571169086e-05, |
|
"loss": 1.8497, |
|
"step": 8711 |
|
}, |
|
{ |
|
"epoch": 0.22244699326323022, |
|
"grad_norm": 1.6452182531356812, |
|
"learning_rate": 4.2738386040468136e-05, |
|
"loss": 1.8551, |
|
"step": 8742 |
|
}, |
|
{ |
|
"epoch": 0.22323581238827714, |
|
"grad_norm": 1.613244652748108, |
|
"learning_rate": 4.2679106193969866e-05, |
|
"loss": 1.8629, |
|
"step": 8773 |
|
}, |
|
{ |
|
"epoch": 0.22402463151332405, |
|
"grad_norm": 1.5586017370224, |
|
"learning_rate": 4.261962684116106e-05, |
|
"loss": 1.8674, |
|
"step": 8804 |
|
}, |
|
{ |
|
"epoch": 0.22481345063837097, |
|
"grad_norm": 1.5552334785461426, |
|
"learning_rate": 4.2559948653258145e-05, |
|
"loss": 1.8923, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 0.22560226976341788, |
|
"grad_norm": 1.609717607498169, |
|
"learning_rate": 4.250007230372134e-05, |
|
"loss": 1.8854, |
|
"step": 8866 |
|
}, |
|
{ |
|
"epoch": 0.2263910888884648, |
|
"grad_norm": 1.617981195449829, |
|
"learning_rate": 4.2439998468247126e-05, |
|
"loss": 1.8794, |
|
"step": 8897 |
|
}, |
|
{ |
|
"epoch": 0.2271799080135117, |
|
"grad_norm": 1.5466769933700562, |
|
"learning_rate": 4.2379727824760566e-05, |
|
"loss": 1.8594, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 0.22796872713855862, |
|
"grad_norm": 1.5078591108322144, |
|
"learning_rate": 4.231926105340768e-05, |
|
"loss": 1.8543, |
|
"step": 8959 |
|
}, |
|
{ |
|
"epoch": 0.22875754626360553, |
|
"grad_norm": 1.5065971612930298, |
|
"learning_rate": 4.225859883654776e-05, |
|
"loss": 1.8506, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.22954636538865245, |
|
"grad_norm": 1.583526849746704, |
|
"learning_rate": 4.219774185874569e-05, |
|
"loss": 1.8741, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 0.23033518451369936, |
|
"grad_norm": 1.5880491733551025, |
|
"learning_rate": 4.213669080676418e-05, |
|
"loss": 1.8531, |
|
"step": 9052 |
|
}, |
|
{ |
|
"epoch": 0.2311240036387463, |
|
"grad_norm": 1.6649373769760132, |
|
"learning_rate": 4.2075446369556056e-05, |
|
"loss": 1.8524, |
|
"step": 9083 |
|
}, |
|
{ |
|
"epoch": 0.2319128227637932, |
|
"grad_norm": 1.556809425354004, |
|
"learning_rate": 4.201400923825648e-05, |
|
"loss": 1.8581, |
|
"step": 9114 |
|
}, |
|
{ |
|
"epoch": 0.23270164188884013, |
|
"grad_norm": 1.5572203397750854, |
|
"learning_rate": 4.195238010617511e-05, |
|
"loss": 1.8546, |
|
"step": 9145 |
|
}, |
|
{ |
|
"epoch": 0.23349046101388704, |
|
"grad_norm": 1.5201469659805298, |
|
"learning_rate": 4.1890559668788344e-05, |
|
"loss": 1.8644, |
|
"step": 9176 |
|
}, |
|
{ |
|
"epoch": 0.23427928013893395, |
|
"grad_norm": 1.5816295146942139, |
|
"learning_rate": 4.1828548623731405e-05, |
|
"loss": 1.8556, |
|
"step": 9207 |
|
}, |
|
{ |
|
"epoch": 0.23506809926398087, |
|
"grad_norm": 1.5696643590927124, |
|
"learning_rate": 4.1766347670790506e-05, |
|
"loss": 1.8491, |
|
"step": 9238 |
|
}, |
|
{ |
|
"epoch": 0.23585691838902778, |
|
"grad_norm": 1.5091686248779297, |
|
"learning_rate": 4.170395751189495e-05, |
|
"loss": 1.8493, |
|
"step": 9269 |
|
}, |
|
{ |
|
"epoch": 0.2366457375140747, |
|
"grad_norm": 1.5627410411834717, |
|
"learning_rate": 4.164137885110921e-05, |
|
"loss": 1.8502, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.2374345566391216, |
|
"grad_norm": 1.5759685039520264, |
|
"learning_rate": 4.157861239462495e-05, |
|
"loss": 1.8427, |
|
"step": 9331 |
|
}, |
|
{ |
|
"epoch": 0.23822337576416852, |
|
"grad_norm": 1.533496379852295, |
|
"learning_rate": 4.1515658850753114e-05, |
|
"loss": 1.8533, |
|
"step": 9362 |
|
}, |
|
{ |
|
"epoch": 0.23901219488921543, |
|
"grad_norm": 1.5628371238708496, |
|
"learning_rate": 4.145251892991588e-05, |
|
"loss": 1.8439, |
|
"step": 9393 |
|
}, |
|
{ |
|
"epoch": 0.23980101401426235, |
|
"grad_norm": 1.568591594696045, |
|
"learning_rate": 4.138919334463868e-05, |
|
"loss": 1.8533, |
|
"step": 9424 |
|
}, |
|
{ |
|
"epoch": 0.24058983313930926, |
|
"grad_norm": 1.5815969705581665, |
|
"learning_rate": 4.1325682809542124e-05, |
|
"loss": 1.8511, |
|
"step": 9455 |
|
}, |
|
{ |
|
"epoch": 0.2413786522643562, |
|
"grad_norm": 1.5089821815490723, |
|
"learning_rate": 4.126198804133398e-05, |
|
"loss": 1.8609, |
|
"step": 9486 |
|
}, |
|
{ |
|
"epoch": 0.24216747138940312, |
|
"grad_norm": 1.5676932334899902, |
|
"learning_rate": 4.1198109758801055e-05, |
|
"loss": 1.8565, |
|
"step": 9517 |
|
}, |
|
{ |
|
"epoch": 0.24295629051445003, |
|
"grad_norm": 1.5041563510894775, |
|
"learning_rate": 4.113404868280107e-05, |
|
"loss": 1.8536, |
|
"step": 9548 |
|
}, |
|
{ |
|
"epoch": 0.24374510963949694, |
|
"grad_norm": 1.5710649490356445, |
|
"learning_rate": 4.106980553625457e-05, |
|
"loss": 1.8547, |
|
"step": 9579 |
|
}, |
|
{ |
|
"epoch": 0.24453392876454386, |
|
"grad_norm": 1.5798616409301758, |
|
"learning_rate": 4.100538104413674e-05, |
|
"loss": 1.8572, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.24532274788959077, |
|
"grad_norm": 2.4222729206085205, |
|
"learning_rate": 4.09407759334692e-05, |
|
"loss": 1.8645, |
|
"step": 9641 |
|
}, |
|
{ |
|
"epoch": 0.24611156701463768, |
|
"grad_norm": 1.4646201133728027, |
|
"learning_rate": 4.087599093331186e-05, |
|
"loss": 1.8496, |
|
"step": 9672 |
|
}, |
|
{ |
|
"epoch": 0.2469003861396846, |
|
"grad_norm": 1.606465458869934, |
|
"learning_rate": 4.081102677475462e-05, |
|
"loss": 1.8459, |
|
"step": 9703 |
|
}, |
|
{ |
|
"epoch": 0.2476892052647315, |
|
"grad_norm": 1.5241799354553223, |
|
"learning_rate": 4.0745884190909194e-05, |
|
"loss": 1.8511, |
|
"step": 9734 |
|
}, |
|
{ |
|
"epoch": 0.24847802438977842, |
|
"grad_norm": 1.6257604360580444, |
|
"learning_rate": 4.0680563916900796e-05, |
|
"loss": 1.8416, |
|
"step": 9765 |
|
}, |
|
{ |
|
"epoch": 0.24926684351482534, |
|
"grad_norm": 1.5233662128448486, |
|
"learning_rate": 4.0615066689859815e-05, |
|
"loss": 1.8465, |
|
"step": 9796 |
|
}, |
|
{ |
|
"epoch": 0.2500556626398723, |
|
"grad_norm": 1.4877644777297974, |
|
"learning_rate": 4.0549393248913584e-05, |
|
"loss": 1.8467, |
|
"step": 9827 |
|
}, |
|
{ |
|
"epoch": 0.2508444817649192, |
|
"grad_norm": 1.530216932296753, |
|
"learning_rate": 4.048354433517794e-05, |
|
"loss": 1.8304, |
|
"step": 9858 |
|
}, |
|
{ |
|
"epoch": 0.2516333008899661, |
|
"grad_norm": 1.4812994003295898, |
|
"learning_rate": 4.0417520691748916e-05, |
|
"loss": 1.8324, |
|
"step": 9889 |
|
}, |
|
{ |
|
"epoch": 0.252422120015013, |
|
"grad_norm": 1.5022152662277222, |
|
"learning_rate": 4.035132306369438e-05, |
|
"loss": 1.8521, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.25321093914005993, |
|
"grad_norm": 1.5792648792266846, |
|
"learning_rate": 4.028495219804555e-05, |
|
"loss": 1.8275, |
|
"step": 9951 |
|
}, |
|
{ |
|
"epoch": 0.25399975826510685, |
|
"grad_norm": 1.4738425016403198, |
|
"learning_rate": 4.021840884378864e-05, |
|
"loss": 1.8484, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 0.25478857739015376, |
|
"grad_norm": 1.4558053016662598, |
|
"learning_rate": 4.015169375185633e-05, |
|
"loss": 1.842, |
|
"step": 10013 |
|
}, |
|
{ |
|
"epoch": 0.25557739651520067, |
|
"grad_norm": 1.6721614599227905, |
|
"learning_rate": 4.0084807675119396e-05, |
|
"loss": 1.8582, |
|
"step": 10044 |
|
}, |
|
{ |
|
"epoch": 0.2563662156402476, |
|
"grad_norm": 1.643314003944397, |
|
"learning_rate": 4.0017751368378106e-05, |
|
"loss": 1.8467, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 0.2571550347652945, |
|
"grad_norm": 1.600417137145996, |
|
"learning_rate": 3.995052558835377e-05, |
|
"loss": 1.8466, |
|
"step": 10106 |
|
}, |
|
{ |
|
"epoch": 0.2579438538903414, |
|
"grad_norm": 1.5018486976623535, |
|
"learning_rate": 3.988313109368017e-05, |
|
"loss": 1.8299, |
|
"step": 10137 |
|
}, |
|
{ |
|
"epoch": 0.2587326730153883, |
|
"grad_norm": 1.4656468629837036, |
|
"learning_rate": 3.981556864489504e-05, |
|
"loss": 1.8254, |
|
"step": 10168 |
|
}, |
|
{ |
|
"epoch": 0.25952149214043524, |
|
"grad_norm": 1.573634386062622, |
|
"learning_rate": 3.974783900443142e-05, |
|
"loss": 1.8387, |
|
"step": 10199 |
|
}, |
|
{ |
|
"epoch": 0.26031031126548215, |
|
"grad_norm": 1.4818580150604248, |
|
"learning_rate": 3.9679942936609095e-05, |
|
"loss": 1.8364, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.26109913039052907, |
|
"grad_norm": 1.560797095298767, |
|
"learning_rate": 3.961188120762596e-05, |
|
"loss": 1.8504, |
|
"step": 10261 |
|
}, |
|
{ |
|
"epoch": 0.261887949515576, |
|
"grad_norm": 1.535651683807373, |
|
"learning_rate": 3.954365458554938e-05, |
|
"loss": 1.8408, |
|
"step": 10292 |
|
}, |
|
{ |
|
"epoch": 0.2626767686406229, |
|
"grad_norm": 1.5009502172470093, |
|
"learning_rate": 3.947526384030751e-05, |
|
"loss": 1.8612, |
|
"step": 10323 |
|
}, |
|
{ |
|
"epoch": 0.2634655877656698, |
|
"grad_norm": 1.5589120388031006, |
|
"learning_rate": 3.9406709743680624e-05, |
|
"loss": 1.843, |
|
"step": 10354 |
|
}, |
|
{ |
|
"epoch": 0.2642544068907167, |
|
"grad_norm": 1.6979308128356934, |
|
"learning_rate": 3.9337993069292366e-05, |
|
"loss": 1.8445, |
|
"step": 10385 |
|
}, |
|
{ |
|
"epoch": 0.26504322601576363, |
|
"grad_norm": 1.4456043243408203, |
|
"learning_rate": 3.926911459260109e-05, |
|
"loss": 1.8455, |
|
"step": 10416 |
|
}, |
|
{ |
|
"epoch": 0.2658320451408106, |
|
"grad_norm": 1.5096760988235474, |
|
"learning_rate": 3.920007509089102e-05, |
|
"loss": 1.8243, |
|
"step": 10447 |
|
}, |
|
{ |
|
"epoch": 0.2666208642658575, |
|
"grad_norm": 1.534406065940857, |
|
"learning_rate": 3.913087534326357e-05, |
|
"loss": 1.828, |
|
"step": 10478 |
|
}, |
|
{ |
|
"epoch": 0.26740968339090443, |
|
"grad_norm": 1.7499176263809204, |
|
"learning_rate": 3.9061516130628475e-05, |
|
"loss": 1.8372, |
|
"step": 10509 |
|
}, |
|
{ |
|
"epoch": 0.26819850251595134, |
|
"grad_norm": 1.5080372095108032, |
|
"learning_rate": 3.8991998235695025e-05, |
|
"loss": 1.8237, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.26898732164099826, |
|
"grad_norm": 1.495322346687317, |
|
"learning_rate": 3.8922322442963224e-05, |
|
"loss": 1.856, |
|
"step": 10571 |
|
}, |
|
{ |
|
"epoch": 0.26977614076604517, |
|
"grad_norm": 1.548775315284729, |
|
"learning_rate": 3.885248953871491e-05, |
|
"loss": 1.8305, |
|
"step": 10602 |
|
}, |
|
{ |
|
"epoch": 0.2705649598910921, |
|
"grad_norm": 1.655852198600769, |
|
"learning_rate": 3.8782500311004915e-05, |
|
"loss": 1.826, |
|
"step": 10633 |
|
}, |
|
{ |
|
"epoch": 0.271353779016139, |
|
"grad_norm": 1.4688091278076172, |
|
"learning_rate": 3.871235554965218e-05, |
|
"loss": 1.8277, |
|
"step": 10664 |
|
}, |
|
{ |
|
"epoch": 0.2721425981411859, |
|
"grad_norm": 1.4753823280334473, |
|
"learning_rate": 3.864205604623078e-05, |
|
"loss": 1.8343, |
|
"step": 10695 |
|
}, |
|
{ |
|
"epoch": 0.2729314172662328, |
|
"grad_norm": 1.5295604467391968, |
|
"learning_rate": 3.857160259406107e-05, |
|
"loss": 1.8212, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 0.27372023639127974, |
|
"grad_norm": 1.5936615467071533, |
|
"learning_rate": 3.8500995988200674e-05, |
|
"loss": 1.8369, |
|
"step": 10757 |
|
}, |
|
{ |
|
"epoch": 0.27450905551632665, |
|
"grad_norm": 1.61776864528656, |
|
"learning_rate": 3.843023702543556e-05, |
|
"loss": 1.8075, |
|
"step": 10788 |
|
}, |
|
{ |
|
"epoch": 0.27529787464137356, |
|
"grad_norm": 1.5151159763336182, |
|
"learning_rate": 3.8359326504270984e-05, |
|
"loss": 1.824, |
|
"step": 10819 |
|
}, |
|
{ |
|
"epoch": 0.2760866937664205, |
|
"grad_norm": 1.6150144338607788, |
|
"learning_rate": 3.828826522492255e-05, |
|
"loss": 1.8392, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.2768755128914674, |
|
"grad_norm": 1.4708991050720215, |
|
"learning_rate": 3.821705398930713e-05, |
|
"loss": 1.8428, |
|
"step": 10881 |
|
}, |
|
{ |
|
"epoch": 0.2776643320165143, |
|
"grad_norm": 1.4772177934646606, |
|
"learning_rate": 3.814569360103385e-05, |
|
"loss": 1.816, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.2784531511415612, |
|
"grad_norm": 1.5404503345489502, |
|
"learning_rate": 3.807418486539499e-05, |
|
"loss": 1.8148, |
|
"step": 10943 |
|
}, |
|
{ |
|
"epoch": 0.27924197026660813, |
|
"grad_norm": 1.5612235069274902, |
|
"learning_rate": 3.80025285893569e-05, |
|
"loss": 1.8345, |
|
"step": 10974 |
|
}, |
|
{ |
|
"epoch": 0.28003078939165504, |
|
"grad_norm": 1.5301347970962524, |
|
"learning_rate": 3.793072558155093e-05, |
|
"loss": 1.8248, |
|
"step": 11005 |
|
}, |
|
{ |
|
"epoch": 0.28081960851670196, |
|
"grad_norm": 1.4737629890441895, |
|
"learning_rate": 3.785877665226426e-05, |
|
"loss": 1.8198, |
|
"step": 11036 |
|
}, |
|
{ |
|
"epoch": 0.28160842764174887, |
|
"grad_norm": 1.4503047466278076, |
|
"learning_rate": 3.778668261343079e-05, |
|
"loss": 1.8371, |
|
"step": 11067 |
|
}, |
|
{ |
|
"epoch": 0.2823972467667958, |
|
"grad_norm": 1.5823520421981812, |
|
"learning_rate": 3.771444427862192e-05, |
|
"loss": 1.8325, |
|
"step": 11098 |
|
}, |
|
{ |
|
"epoch": 0.2831860658918427, |
|
"grad_norm": 1.6144663095474243, |
|
"learning_rate": 3.7642062463037465e-05, |
|
"loss": 1.8209, |
|
"step": 11129 |
|
}, |
|
{ |
|
"epoch": 0.2839748850168896, |
|
"grad_norm": 1.6444216966629028, |
|
"learning_rate": 3.7569537983496373e-05, |
|
"loss": 1.825, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.2847637041419365, |
|
"grad_norm": 1.503098487854004, |
|
"learning_rate": 3.749687165842753e-05, |
|
"loss": 1.8392, |
|
"step": 11191 |
|
}, |
|
{ |
|
"epoch": 0.2855525232669835, |
|
"grad_norm": 1.83393394947052, |
|
"learning_rate": 3.7424064307860536e-05, |
|
"loss": 1.8252, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 0.2863413423920304, |
|
"grad_norm": 1.4748836755752563, |
|
"learning_rate": 3.735111675341645e-05, |
|
"loss": 1.806, |
|
"step": 11253 |
|
}, |
|
{ |
|
"epoch": 0.2871301615170773, |
|
"grad_norm": 1.912840485572815, |
|
"learning_rate": 3.7278029818298524e-05, |
|
"loss": 1.8095, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 0.28791898064212423, |
|
"grad_norm": 1.5209791660308838, |
|
"learning_rate": 3.720480432728287e-05, |
|
"loss": 1.8162, |
|
"step": 11315 |
|
}, |
|
{ |
|
"epoch": 0.28870779976717115, |
|
"grad_norm": 1.5430744886398315, |
|
"learning_rate": 3.71314411067092e-05, |
|
"loss": 1.833, |
|
"step": 11346 |
|
}, |
|
{ |
|
"epoch": 0.28949661889221806, |
|
"grad_norm": 1.592753529548645, |
|
"learning_rate": 3.70579409844715e-05, |
|
"loss": 1.8402, |
|
"step": 11377 |
|
}, |
|
{ |
|
"epoch": 0.290285438017265, |
|
"grad_norm": 1.5640573501586914, |
|
"learning_rate": 3.698430479000865e-05, |
|
"loss": 1.8275, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 0.2910742571423119, |
|
"grad_norm": 1.4649739265441895, |
|
"learning_rate": 3.691053335429509e-05, |
|
"loss": 1.8092, |
|
"step": 11439 |
|
}, |
|
{ |
|
"epoch": 0.2918630762673588, |
|
"grad_norm": 1.5399479866027832, |
|
"learning_rate": 3.683662750983147e-05, |
|
"loss": 1.8131, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 0.2926518953924057, |
|
"grad_norm": 1.5420011281967163, |
|
"learning_rate": 3.676258809063518e-05, |
|
"loss": 1.8287, |
|
"step": 11501 |
|
}, |
|
{ |
|
"epoch": 0.2934407145174526, |
|
"grad_norm": 1.4778993129730225, |
|
"learning_rate": 3.6688415932231004e-05, |
|
"loss": 1.8202, |
|
"step": 11532 |
|
}, |
|
{ |
|
"epoch": 0.29422953364249954, |
|
"grad_norm": 1.5598480701446533, |
|
"learning_rate": 3.661411187164166e-05, |
|
"loss": 1.8425, |
|
"step": 11563 |
|
}, |
|
{ |
|
"epoch": 0.29501835276754645, |
|
"grad_norm": 1.6161003112792969, |
|
"learning_rate": 3.65396767473784e-05, |
|
"loss": 1.8402, |
|
"step": 11594 |
|
}, |
|
{ |
|
"epoch": 0.29580717189259337, |
|
"grad_norm": 1.4399648904800415, |
|
"learning_rate": 3.6465111399431465e-05, |
|
"loss": 1.8083, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 0.2965959910176403, |
|
"grad_norm": 1.5157605409622192, |
|
"learning_rate": 3.6390416669260674e-05, |
|
"loss": 1.8041, |
|
"step": 11656 |
|
}, |
|
{ |
|
"epoch": 0.2973848101426872, |
|
"grad_norm": 1.5234498977661133, |
|
"learning_rate": 3.63155933997859e-05, |
|
"loss": 1.8104, |
|
"step": 11687 |
|
}, |
|
{ |
|
"epoch": 0.2981736292677341, |
|
"grad_norm": 1.4988269805908203, |
|
"learning_rate": 3.624064243537758e-05, |
|
"loss": 1.8192, |
|
"step": 11718 |
|
}, |
|
{ |
|
"epoch": 0.298962448392781, |
|
"grad_norm": 1.5283297300338745, |
|
"learning_rate": 3.616556462184716e-05, |
|
"loss": 1.8123, |
|
"step": 11749 |
|
}, |
|
{ |
|
"epoch": 0.29975126751782794, |
|
"grad_norm": 1.523959994316101, |
|
"learning_rate": 3.609036080643755e-05, |
|
"loss": 1.8391, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.30054008664287485, |
|
"grad_norm": 1.4363136291503906, |
|
"learning_rate": 3.60150318378136e-05, |
|
"loss": 1.8176, |
|
"step": 11811 |
|
}, |
|
{ |
|
"epoch": 0.30132890576792176, |
|
"grad_norm": 1.521101474761963, |
|
"learning_rate": 3.5939578566052465e-05, |
|
"loss": 1.8164, |
|
"step": 11842 |
|
}, |
|
{ |
|
"epoch": 0.3021177248929687, |
|
"grad_norm": 1.4613672494888306, |
|
"learning_rate": 3.586400184263408e-05, |
|
"loss": 1.8204, |
|
"step": 11873 |
|
}, |
|
{ |
|
"epoch": 0.3029065440180156, |
|
"grad_norm": 1.4992350339889526, |
|
"learning_rate": 3.578830252043148e-05, |
|
"loss": 1.8122, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.3036953631430625, |
|
"grad_norm": 1.659454584121704, |
|
"learning_rate": 3.571248145370125e-05, |
|
"loss": 1.8243, |
|
"step": 11935 |
|
}, |
|
{ |
|
"epoch": 0.3044841822681094, |
|
"grad_norm": 1.429630160331726, |
|
"learning_rate": 3.5636539498073794e-05, |
|
"loss": 1.8079, |
|
"step": 11966 |
|
}, |
|
{ |
|
"epoch": 0.3052730013931564, |
|
"grad_norm": 1.4403787851333618, |
|
"learning_rate": 3.556047751054378e-05, |
|
"loss": 1.8079, |
|
"step": 11997 |
|
}, |
|
{ |
|
"epoch": 0.3060618205182033, |
|
"grad_norm": 1.4795056581497192, |
|
"learning_rate": 3.548429634946039e-05, |
|
"loss": 1.7949, |
|
"step": 12028 |
|
}, |
|
{ |
|
"epoch": 0.3068506396432502, |
|
"grad_norm": 1.5466026067733765, |
|
"learning_rate": 3.540799687451768e-05, |
|
"loss": 1.8039, |
|
"step": 12059 |
|
}, |
|
{ |
|
"epoch": 0.3076394587682971, |
|
"grad_norm": 1.5872678756713867, |
|
"learning_rate": 3.533157994674485e-05, |
|
"loss": 1.8174, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 0.30842827789334404, |
|
"grad_norm": 1.4539406299591064, |
|
"learning_rate": 3.5255046428496546e-05, |
|
"loss": 1.8212, |
|
"step": 12121 |
|
}, |
|
{ |
|
"epoch": 0.30921709701839095, |
|
"grad_norm": 1.443831443786621, |
|
"learning_rate": 3.517839718344311e-05, |
|
"loss": 1.8387, |
|
"step": 12152 |
|
}, |
|
{ |
|
"epoch": 0.31000591614343787, |
|
"grad_norm": 1.4756397008895874, |
|
"learning_rate": 3.510163307656086e-05, |
|
"loss": 1.827, |
|
"step": 12183 |
|
}, |
|
{ |
|
"epoch": 0.3107947352684848, |
|
"grad_norm": 1.5675030946731567, |
|
"learning_rate": 3.5024754974122324e-05, |
|
"loss": 1.8198, |
|
"step": 12214 |
|
}, |
|
{ |
|
"epoch": 0.3115835543935317, |
|
"grad_norm": 1.4247853755950928, |
|
"learning_rate": 3.494776374368643e-05, |
|
"loss": 1.8072, |
|
"step": 12245 |
|
}, |
|
{ |
|
"epoch": 0.3123723735185786, |
|
"grad_norm": 1.567158579826355, |
|
"learning_rate": 3.4870660254088724e-05, |
|
"loss": 1.8158, |
|
"step": 12276 |
|
}, |
|
{ |
|
"epoch": 0.3131611926436255, |
|
"grad_norm": 1.4549590349197388, |
|
"learning_rate": 3.479344537543164e-05, |
|
"loss": 1.8044, |
|
"step": 12307 |
|
}, |
|
{ |
|
"epoch": 0.31395001176867243, |
|
"grad_norm": 1.4478166103363037, |
|
"learning_rate": 3.4716119979074565e-05, |
|
"loss": 1.8099, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 0.31473883089371935, |
|
"grad_norm": 1.4160298109054565, |
|
"learning_rate": 3.463868493762412e-05, |
|
"loss": 1.8179, |
|
"step": 12369 |
|
}, |
|
{ |
|
"epoch": 0.31552765001876626, |
|
"grad_norm": 1.5618009567260742, |
|
"learning_rate": 3.456114112492418e-05, |
|
"loss": 1.801, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3163164691438132, |
|
"grad_norm": 1.3777692317962646, |
|
"learning_rate": 3.4483489416046164e-05, |
|
"loss": 1.8117, |
|
"step": 12431 |
|
}, |
|
{ |
|
"epoch": 0.3171052882688601, |
|
"grad_norm": 1.5304317474365234, |
|
"learning_rate": 3.440573068727905e-05, |
|
"loss": 1.8085, |
|
"step": 12462 |
|
}, |
|
{ |
|
"epoch": 0.317894107393907, |
|
"grad_norm": 1.5578155517578125, |
|
"learning_rate": 3.4327865816119495e-05, |
|
"loss": 1.8106, |
|
"step": 12493 |
|
}, |
|
{ |
|
"epoch": 0.3186829265189539, |
|
"grad_norm": 1.51682710647583, |
|
"learning_rate": 3.4249895681262025e-05, |
|
"loss": 1.8199, |
|
"step": 12524 |
|
}, |
|
{ |
|
"epoch": 0.3194717456440008, |
|
"grad_norm": 1.4429659843444824, |
|
"learning_rate": 3.417182116258899e-05, |
|
"loss": 1.7975, |
|
"step": 12555 |
|
}, |
|
{ |
|
"epoch": 0.32026056476904774, |
|
"grad_norm": 1.4837095737457275, |
|
"learning_rate": 3.409364314116074e-05, |
|
"loss": 1.8039, |
|
"step": 12586 |
|
}, |
|
{ |
|
"epoch": 0.32104938389409465, |
|
"grad_norm": 1.4250850677490234, |
|
"learning_rate": 3.401536249920559e-05, |
|
"loss": 1.7886, |
|
"step": 12617 |
|
}, |
|
{ |
|
"epoch": 0.32183820301914157, |
|
"grad_norm": 1.429291009902954, |
|
"learning_rate": 3.393698012010998e-05, |
|
"loss": 1.7938, |
|
"step": 12648 |
|
}, |
|
{ |
|
"epoch": 0.3226270221441885, |
|
"grad_norm": 1.4839123487472534, |
|
"learning_rate": 3.385849688840839e-05, |
|
"loss": 1.811, |
|
"step": 12679 |
|
}, |
|
{ |
|
"epoch": 0.3234158412692354, |
|
"grad_norm": 1.5528687238693237, |
|
"learning_rate": 3.3779913689773414e-05, |
|
"loss": 1.8004, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 0.3242046603942823, |
|
"grad_norm": 1.4324339628219604, |
|
"learning_rate": 3.370123141100578e-05, |
|
"loss": 1.8048, |
|
"step": 12741 |
|
}, |
|
{ |
|
"epoch": 0.3249934795193293, |
|
"grad_norm": 1.5204551219940186, |
|
"learning_rate": 3.3622450940024305e-05, |
|
"loss": 1.7944, |
|
"step": 12772 |
|
}, |
|
{ |
|
"epoch": 0.3257822986443762, |
|
"grad_norm": 1.3667051792144775, |
|
"learning_rate": 3.35435731658559e-05, |
|
"loss": 1.8086, |
|
"step": 12803 |
|
}, |
|
{ |
|
"epoch": 0.3265711177694231, |
|
"grad_norm": 1.4720345735549927, |
|
"learning_rate": 3.346459897862552e-05, |
|
"loss": 1.7982, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 0.32735993689447, |
|
"grad_norm": 1.4867823123931885, |
|
"learning_rate": 3.338552926954613e-05, |
|
"loss": 1.7855, |
|
"step": 12865 |
|
}, |
|
{ |
|
"epoch": 0.32814875601951693, |
|
"grad_norm": 1.468201994895935, |
|
"learning_rate": 3.330636493090868e-05, |
|
"loss": 1.7909, |
|
"step": 12896 |
|
}, |
|
{ |
|
"epoch": 0.32893757514456384, |
|
"grad_norm": 1.4950802326202393, |
|
"learning_rate": 3.322710685607193e-05, |
|
"loss": 1.78, |
|
"step": 12927 |
|
}, |
|
{ |
|
"epoch": 0.32972639426961076, |
|
"grad_norm": 1.6039625406265259, |
|
"learning_rate": 3.314775593945251e-05, |
|
"loss": 1.8094, |
|
"step": 12958 |
|
}, |
|
{ |
|
"epoch": 0.33051521339465767, |
|
"grad_norm": 1.5084600448608398, |
|
"learning_rate": 3.3068313076514714e-05, |
|
"loss": 1.7908, |
|
"step": 12989 |
|
}, |
|
{ |
|
"epoch": 0.3313040325197046, |
|
"grad_norm": 1.4338363409042358, |
|
"learning_rate": 3.298877916376047e-05, |
|
"loss": 1.8092, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.3320928516447515, |
|
"grad_norm": 1.4125412702560425, |
|
"learning_rate": 3.290915509871915e-05, |
|
"loss": 1.7756, |
|
"step": 13051 |
|
}, |
|
{ |
|
"epoch": 0.3328816707697984, |
|
"grad_norm": 1.4515947103500366, |
|
"learning_rate": 3.282944177993753e-05, |
|
"loss": 1.7845, |
|
"step": 13082 |
|
}, |
|
{ |
|
"epoch": 0.3336704898948453, |
|
"grad_norm": 1.586795449256897, |
|
"learning_rate": 3.274964010696957e-05, |
|
"loss": 1.8047, |
|
"step": 13113 |
|
}, |
|
{ |
|
"epoch": 0.33445930901989224, |
|
"grad_norm": 1.486333966255188, |
|
"learning_rate": 3.266975098036629e-05, |
|
"loss": 1.7913, |
|
"step": 13144 |
|
}, |
|
{ |
|
"epoch": 0.33524812814493915, |
|
"grad_norm": 1.4172276258468628, |
|
"learning_rate": 3.258977530166562e-05, |
|
"loss": 1.7962, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 0.33603694726998606, |
|
"grad_norm": 1.4856761693954468, |
|
"learning_rate": 3.250971397338227e-05, |
|
"loss": 1.8232, |
|
"step": 13206 |
|
}, |
|
{ |
|
"epoch": 0.336825766395033, |
|
"grad_norm": 1.496001124382019, |
|
"learning_rate": 3.2429567898997404e-05, |
|
"loss": 1.7968, |
|
"step": 13237 |
|
}, |
|
{ |
|
"epoch": 0.3376145855200799, |
|
"grad_norm": 1.5100946426391602, |
|
"learning_rate": 3.234933798294859e-05, |
|
"loss": 1.806, |
|
"step": 13268 |
|
}, |
|
{ |
|
"epoch": 0.3384034046451268, |
|
"grad_norm": 1.4353514909744263, |
|
"learning_rate": 3.2269025130619535e-05, |
|
"loss": 1.8088, |
|
"step": 13299 |
|
}, |
|
{ |
|
"epoch": 0.3391922237701737, |
|
"grad_norm": 1.393201231956482, |
|
"learning_rate": 3.218863024832985e-05, |
|
"loss": 1.778, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 0.33998104289522063, |
|
"grad_norm": 1.4197418689727783, |
|
"learning_rate": 3.2108154243324864e-05, |
|
"loss": 1.7986, |
|
"step": 13361 |
|
}, |
|
{ |
|
"epoch": 0.34076986202026754, |
|
"grad_norm": 1.5117108821868896, |
|
"learning_rate": 3.2027598023765345e-05, |
|
"loss": 1.812, |
|
"step": 13392 |
|
}, |
|
{ |
|
"epoch": 0.34155868114531446, |
|
"grad_norm": 1.4815988540649414, |
|
"learning_rate": 3.194696249871729e-05, |
|
"loss": 1.7971, |
|
"step": 13423 |
|
}, |
|
{ |
|
"epoch": 0.34234750027036137, |
|
"grad_norm": 1.4436742067337036, |
|
"learning_rate": 3.186624857814164e-05, |
|
"loss": 1.7946, |
|
"step": 13454 |
|
}, |
|
{ |
|
"epoch": 0.3431363193954083, |
|
"grad_norm": 1.6074799299240112, |
|
"learning_rate": 3.178545717288401e-05, |
|
"loss": 1.8018, |
|
"step": 13485 |
|
}, |
|
{ |
|
"epoch": 0.3439251385204552, |
|
"grad_norm": 1.5564550161361694, |
|
"learning_rate": 3.170458919466444e-05, |
|
"loss": 1.7816, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 0.34471395764550217, |
|
"grad_norm": 1.4918630123138428, |
|
"learning_rate": 3.1623645556067063e-05, |
|
"loss": 1.7759, |
|
"step": 13547 |
|
}, |
|
{ |
|
"epoch": 0.3455027767705491, |
|
"grad_norm": 1.537247896194458, |
|
"learning_rate": 3.154262717052985e-05, |
|
"loss": 1.7959, |
|
"step": 13578 |
|
}, |
|
{ |
|
"epoch": 0.346291595895596, |
|
"grad_norm": 1.4561980962753296, |
|
"learning_rate": 3.146153495233426e-05, |
|
"loss": 1.7923, |
|
"step": 13609 |
|
}, |
|
{ |
|
"epoch": 0.3470804150206429, |
|
"grad_norm": 1.4964359998703003, |
|
"learning_rate": 3.1380369816594944e-05, |
|
"loss": 1.8033, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.3478692341456898, |
|
"grad_norm": 1.463797688484192, |
|
"learning_rate": 3.129913267924946e-05, |
|
"loss": 1.7794, |
|
"step": 13671 |
|
}, |
|
{ |
|
"epoch": 0.34865805327073673, |
|
"grad_norm": 1.4784711599349976, |
|
"learning_rate": 3.121782445704782e-05, |
|
"loss": 1.8002, |
|
"step": 13702 |
|
}, |
|
{ |
|
"epoch": 0.34944687239578365, |
|
"grad_norm": 1.4917761087417603, |
|
"learning_rate": 3.11364460675423e-05, |
|
"loss": 1.7964, |
|
"step": 13733 |
|
}, |
|
{ |
|
"epoch": 0.35023569152083056, |
|
"grad_norm": 1.4284688234329224, |
|
"learning_rate": 3.1054998429076934e-05, |
|
"loss": 1.7981, |
|
"step": 13764 |
|
}, |
|
{ |
|
"epoch": 0.3510245106458775, |
|
"grad_norm": 1.4910475015640259, |
|
"learning_rate": 3.097348246077728e-05, |
|
"loss": 1.7952, |
|
"step": 13795 |
|
}, |
|
{ |
|
"epoch": 0.3518133297709244, |
|
"grad_norm": 1.4870178699493408, |
|
"learning_rate": 3.0891899082539924e-05, |
|
"loss": 1.7876, |
|
"step": 13826 |
|
}, |
|
{ |
|
"epoch": 0.3526021488959713, |
|
"grad_norm": 1.5134365558624268, |
|
"learning_rate": 3.0810249215022233e-05, |
|
"loss": 1.7961, |
|
"step": 13857 |
|
}, |
|
{ |
|
"epoch": 0.3533909680210182, |
|
"grad_norm": 1.595760464668274, |
|
"learning_rate": 3.0728533779631865e-05, |
|
"loss": 1.8069, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.35417978714606513, |
|
"grad_norm": 1.5597907304763794, |
|
"learning_rate": 3.064675369851637e-05, |
|
"loss": 1.787, |
|
"step": 13919 |
|
}, |
|
{ |
|
"epoch": 0.35496860627111204, |
|
"grad_norm": 1.4652432203292847, |
|
"learning_rate": 3.056490989455289e-05, |
|
"loss": 1.7936, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.35575742539615895, |
|
"grad_norm": 1.5195232629776, |
|
"learning_rate": 3.0483003291337596e-05, |
|
"loss": 1.7988, |
|
"step": 13981 |
|
}, |
|
{ |
|
"epoch": 0.35654624452120587, |
|
"grad_norm": 1.5883373022079468, |
|
"learning_rate": 3.040103481317539e-05, |
|
"loss": 1.7752, |
|
"step": 14012 |
|
}, |
|
{ |
|
"epoch": 0.3573350636462528, |
|
"grad_norm": 1.4016722440719604, |
|
"learning_rate": 3.03190053850694e-05, |
|
"loss": 1.7793, |
|
"step": 14043 |
|
}, |
|
{ |
|
"epoch": 0.3581238827712997, |
|
"grad_norm": 1.4025518894195557, |
|
"learning_rate": 3.0236915932710573e-05, |
|
"loss": 1.7788, |
|
"step": 14074 |
|
}, |
|
{ |
|
"epoch": 0.3589127018963466, |
|
"grad_norm": 1.3901499509811401, |
|
"learning_rate": 3.0154767382467232e-05, |
|
"loss": 1.7766, |
|
"step": 14105 |
|
}, |
|
{ |
|
"epoch": 0.3597015210213935, |
|
"grad_norm": 1.4077001810073853, |
|
"learning_rate": 3.0072560661374582e-05, |
|
"loss": 1.7903, |
|
"step": 14136 |
|
}, |
|
{ |
|
"epoch": 0.36049034014644044, |
|
"grad_norm": 1.4476062059402466, |
|
"learning_rate": 2.999029669712431e-05, |
|
"loss": 1.7851, |
|
"step": 14167 |
|
}, |
|
{ |
|
"epoch": 0.36127915927148735, |
|
"grad_norm": 1.4461426734924316, |
|
"learning_rate": 2.990797641805408e-05, |
|
"loss": 1.7759, |
|
"step": 14198 |
|
}, |
|
{ |
|
"epoch": 0.36206797839653426, |
|
"grad_norm": 1.452197790145874, |
|
"learning_rate": 2.982560075313704e-05, |
|
"loss": 1.7877, |
|
"step": 14229 |
|
}, |
|
{ |
|
"epoch": 0.3628567975215812, |
|
"grad_norm": 1.4651702642440796, |
|
"learning_rate": 2.9743170631971368e-05, |
|
"loss": 1.7609, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.3636456166466281, |
|
"grad_norm": 1.4937199354171753, |
|
"learning_rate": 2.9660686984769792e-05, |
|
"loss": 1.7794, |
|
"step": 14291 |
|
}, |
|
{ |
|
"epoch": 0.36443443577167506, |
|
"grad_norm": 1.4670535326004028, |
|
"learning_rate": 2.9578150742349047e-05, |
|
"loss": 1.7891, |
|
"step": 14322 |
|
}, |
|
{ |
|
"epoch": 0.36522325489672197, |
|
"grad_norm": 1.5605027675628662, |
|
"learning_rate": 2.949556283611942e-05, |
|
"loss": 1.7941, |
|
"step": 14353 |
|
}, |
|
{ |
|
"epoch": 0.3660120740217689, |
|
"grad_norm": 1.4724050760269165, |
|
"learning_rate": 2.9412924198074206e-05, |
|
"loss": 1.7944, |
|
"step": 14384 |
|
}, |
|
{ |
|
"epoch": 0.3668008931468158, |
|
"grad_norm": 1.4741649627685547, |
|
"learning_rate": 2.9330235760779208e-05, |
|
"loss": 1.7881, |
|
"step": 14415 |
|
}, |
|
{ |
|
"epoch": 0.3675897122718627, |
|
"grad_norm": 1.9677305221557617, |
|
"learning_rate": 2.9247498457362188e-05, |
|
"loss": 1.7911, |
|
"step": 14446 |
|
}, |
|
{ |
|
"epoch": 0.3683785313969096, |
|
"grad_norm": 1.5587711334228516, |
|
"learning_rate": 2.9164713221502373e-05, |
|
"loss": 1.7955, |
|
"step": 14477 |
|
}, |
|
{ |
|
"epoch": 0.36916735052195654, |
|
"grad_norm": 1.4210267066955566, |
|
"learning_rate": 2.9081880987419912e-05, |
|
"loss": 1.7656, |
|
"step": 14508 |
|
}, |
|
{ |
|
"epoch": 0.36995616964700345, |
|
"grad_norm": 1.5188663005828857, |
|
"learning_rate": 2.8999002689865296e-05, |
|
"loss": 1.7779, |
|
"step": 14539 |
|
}, |
|
{ |
|
"epoch": 0.37074498877205037, |
|
"grad_norm": 1.460797905921936, |
|
"learning_rate": 2.8916079264108852e-05, |
|
"loss": 1.7929, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 0.3715338078970973, |
|
"grad_norm": 1.464895486831665, |
|
"learning_rate": 2.883311164593017e-05, |
|
"loss": 1.777, |
|
"step": 14601 |
|
}, |
|
{ |
|
"epoch": 0.3723226270221442, |
|
"grad_norm": 1.5290796756744385, |
|
"learning_rate": 2.875010077160754e-05, |
|
"loss": 1.7837, |
|
"step": 14632 |
|
}, |
|
{ |
|
"epoch": 0.3731114461471911, |
|
"grad_norm": 1.4018083810806274, |
|
"learning_rate": 2.866704757790741e-05, |
|
"loss": 1.7685, |
|
"step": 14663 |
|
}, |
|
{ |
|
"epoch": 0.373900265272238, |
|
"grad_norm": 1.3743735551834106, |
|
"learning_rate": 2.858395300207376e-05, |
|
"loss": 1.7752, |
|
"step": 14694 |
|
}, |
|
{ |
|
"epoch": 0.37468908439728493, |
|
"grad_norm": 1.4071861505508423, |
|
"learning_rate": 2.8500817981817607e-05, |
|
"loss": 1.7894, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 0.37547790352233185, |
|
"grad_norm": 1.533968448638916, |
|
"learning_rate": 2.8417643455306336e-05, |
|
"loss": 1.7815, |
|
"step": 14756 |
|
}, |
|
{ |
|
"epoch": 0.37626672264737876, |
|
"grad_norm": 1.4698586463928223, |
|
"learning_rate": 2.8334430361153185e-05, |
|
"loss": 1.7693, |
|
"step": 14787 |
|
}, |
|
{ |
|
"epoch": 0.3770555417724257, |
|
"grad_norm": 1.4075795412063599, |
|
"learning_rate": 2.8251179638406612e-05, |
|
"loss": 1.7701, |
|
"step": 14818 |
|
}, |
|
{ |
|
"epoch": 0.3778443608974726, |
|
"grad_norm": 1.4033679962158203, |
|
"learning_rate": 2.8167892226539704e-05, |
|
"loss": 1.7728, |
|
"step": 14849 |
|
}, |
|
{ |
|
"epoch": 0.3786331800225195, |
|
"grad_norm": 1.4572257995605469, |
|
"learning_rate": 2.8084569065439588e-05, |
|
"loss": 1.8019, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.3794219991475664, |
|
"grad_norm": 1.5333317518234253, |
|
"learning_rate": 2.8001211095396807e-05, |
|
"loss": 1.7979, |
|
"step": 14911 |
|
}, |
|
{ |
|
"epoch": 0.3802108182726133, |
|
"grad_norm": 1.4421522617340088, |
|
"learning_rate": 2.791781925709473e-05, |
|
"loss": 1.7768, |
|
"step": 14942 |
|
}, |
|
{ |
|
"epoch": 0.38099963739766024, |
|
"grad_norm": 1.5021952390670776, |
|
"learning_rate": 2.7834394491598908e-05, |
|
"loss": 1.7758, |
|
"step": 14973 |
|
}, |
|
{ |
|
"epoch": 0.38178845652270715, |
|
"grad_norm": 1.462990641593933, |
|
"learning_rate": 2.7750937740346485e-05, |
|
"loss": 1.757, |
|
"step": 15004 |
|
}, |
|
{ |
|
"epoch": 0.38257727564775407, |
|
"grad_norm": 1.4034866094589233, |
|
"learning_rate": 2.7667449945135564e-05, |
|
"loss": 1.7658, |
|
"step": 15035 |
|
}, |
|
{ |
|
"epoch": 0.383366094772801, |
|
"grad_norm": 1.5529896020889282, |
|
"learning_rate": 2.7583932048114557e-05, |
|
"loss": 1.787, |
|
"step": 15066 |
|
}, |
|
{ |
|
"epoch": 0.38415491389784795, |
|
"grad_norm": 1.3766757249832153, |
|
"learning_rate": 2.7500384991771587e-05, |
|
"loss": 1.7857, |
|
"step": 15097 |
|
}, |
|
{ |
|
"epoch": 0.38494373302289486, |
|
"grad_norm": 1.3775665760040283, |
|
"learning_rate": 2.7416809718923825e-05, |
|
"loss": 1.7961, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 0.3857325521479418, |
|
"grad_norm": 1.4085272550582886, |
|
"learning_rate": 2.7333207172706864e-05, |
|
"loss": 1.7818, |
|
"step": 15159 |
|
}, |
|
{ |
|
"epoch": 0.3865213712729887, |
|
"grad_norm": 1.441758394241333, |
|
"learning_rate": 2.7249578296564088e-05, |
|
"loss": 1.7746, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 0.3873101903980356, |
|
"grad_norm": 1.4011828899383545, |
|
"learning_rate": 2.7165924034235973e-05, |
|
"loss": 1.7704, |
|
"step": 15221 |
|
}, |
|
{ |
|
"epoch": 0.3880990095230825, |
|
"grad_norm": 1.4673304557800293, |
|
"learning_rate": 2.708224532974953e-05, |
|
"loss": 1.7863, |
|
"step": 15252 |
|
}, |
|
{ |
|
"epoch": 0.38888782864812943, |
|
"grad_norm": 1.4282735586166382, |
|
"learning_rate": 2.6998543127407538e-05, |
|
"loss": 1.76, |
|
"step": 15283 |
|
}, |
|
{ |
|
"epoch": 0.38967664777317634, |
|
"grad_norm": 1.3983831405639648, |
|
"learning_rate": 2.6914818371777988e-05, |
|
"loss": 1.7803, |
|
"step": 15314 |
|
}, |
|
{ |
|
"epoch": 0.39046546689822326, |
|
"grad_norm": 1.5473729372024536, |
|
"learning_rate": 2.6831072007683373e-05, |
|
"loss": 1.7787, |
|
"step": 15345 |
|
}, |
|
{ |
|
"epoch": 0.39125428602327017, |
|
"grad_norm": 1.565489649772644, |
|
"learning_rate": 2.6747304980190018e-05, |
|
"loss": 1.755, |
|
"step": 15376 |
|
}, |
|
{ |
|
"epoch": 0.3920431051483171, |
|
"grad_norm": 1.4918326139450073, |
|
"learning_rate": 2.6663518234597453e-05, |
|
"loss": 1.8007, |
|
"step": 15407 |
|
}, |
|
{ |
|
"epoch": 0.392831924273364, |
|
"grad_norm": 1.5468804836273193, |
|
"learning_rate": 2.6579712716427696e-05, |
|
"loss": 1.7574, |
|
"step": 15438 |
|
}, |
|
{ |
|
"epoch": 0.3936207433984109, |
|
"grad_norm": 1.4871866703033447, |
|
"learning_rate": 2.6495889371414652e-05, |
|
"loss": 1.7757, |
|
"step": 15469 |
|
}, |
|
{ |
|
"epoch": 0.3944095625234578, |
|
"grad_norm": 1.5485950708389282, |
|
"learning_rate": 2.6412049145493367e-05, |
|
"loss": 1.79, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.39519838164850474, |
|
"grad_norm": 1.5302681922912598, |
|
"learning_rate": 2.632819298478939e-05, |
|
"loss": 1.779, |
|
"step": 15531 |
|
}, |
|
{ |
|
"epoch": 0.39598720077355165, |
|
"grad_norm": 1.5713484287261963, |
|
"learning_rate": 2.6244321835608105e-05, |
|
"loss": 1.7526, |
|
"step": 15562 |
|
}, |
|
{ |
|
"epoch": 0.39677601989859856, |
|
"grad_norm": 1.4450056552886963, |
|
"learning_rate": 2.6160436644424024e-05, |
|
"loss": 1.7896, |
|
"step": 15593 |
|
}, |
|
{ |
|
"epoch": 0.3975648390236455, |
|
"grad_norm": 1.5404566526412964, |
|
"learning_rate": 2.6076538357870133e-05, |
|
"loss": 1.7612, |
|
"step": 15624 |
|
}, |
|
{ |
|
"epoch": 0.3983536581486924, |
|
"grad_norm": 1.5850070714950562, |
|
"learning_rate": 2.5992627922727196e-05, |
|
"loss": 1.7588, |
|
"step": 15655 |
|
}, |
|
{ |
|
"epoch": 0.3991424772737393, |
|
"grad_norm": 1.4891109466552734, |
|
"learning_rate": 2.5908706285913066e-05, |
|
"loss": 1.768, |
|
"step": 15686 |
|
}, |
|
{ |
|
"epoch": 0.3999312963987862, |
|
"grad_norm": 1.4907901287078857, |
|
"learning_rate": 2.5824774394472008e-05, |
|
"loss": 1.7672, |
|
"step": 15717 |
|
}, |
|
{ |
|
"epoch": 0.40072011552383313, |
|
"grad_norm": 1.418935775756836, |
|
"learning_rate": 2.5740833195563996e-05, |
|
"loss": 1.7812, |
|
"step": 15748 |
|
}, |
|
{ |
|
"epoch": 0.40150893464888004, |
|
"grad_norm": 1.4996947050094604, |
|
"learning_rate": 2.5656883636454067e-05, |
|
"loss": 1.7833, |
|
"step": 15779 |
|
}, |
|
{ |
|
"epoch": 0.40229775377392696, |
|
"grad_norm": 1.5073673725128174, |
|
"learning_rate": 2.557292666450159e-05, |
|
"loss": 1.7768, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 0.40308657289897387, |
|
"grad_norm": 1.4004729986190796, |
|
"learning_rate": 2.5488963227149566e-05, |
|
"loss": 1.7688, |
|
"step": 15841 |
|
}, |
|
{ |
|
"epoch": 0.40387539202402084, |
|
"grad_norm": 1.4226566553115845, |
|
"learning_rate": 2.5404994271913983e-05, |
|
"loss": 1.7758, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.40466421114906775, |
|
"grad_norm": 1.3709113597869873, |
|
"learning_rate": 2.5321020746373085e-05, |
|
"loss": 1.7664, |
|
"step": 15903 |
|
}, |
|
{ |
|
"epoch": 0.40545303027411467, |
|
"grad_norm": 1.3796721696853638, |
|
"learning_rate": 2.52370435981567e-05, |
|
"loss": 1.7584, |
|
"step": 15934 |
|
}, |
|
{ |
|
"epoch": 0.4062418493991616, |
|
"grad_norm": 1.455452561378479, |
|
"learning_rate": 2.5153063774935533e-05, |
|
"loss": 1.7745, |
|
"step": 15965 |
|
}, |
|
{ |
|
"epoch": 0.4070306685242085, |
|
"grad_norm": 1.509347677230835, |
|
"learning_rate": 2.506908222441045e-05, |
|
"loss": 1.7763, |
|
"step": 15996 |
|
}, |
|
{ |
|
"epoch": 0.4078194876492554, |
|
"grad_norm": 1.3093947172164917, |
|
"learning_rate": 2.498509989430187e-05, |
|
"loss": 1.7565, |
|
"step": 16027 |
|
}, |
|
{ |
|
"epoch": 0.4086083067743023, |
|
"grad_norm": 1.4819965362548828, |
|
"learning_rate": 2.4901117732338958e-05, |
|
"loss": 1.7678, |
|
"step": 16058 |
|
}, |
|
{ |
|
"epoch": 0.40939712589934923, |
|
"grad_norm": 1.4977960586547852, |
|
"learning_rate": 2.481713668624899e-05, |
|
"loss": 1.7673, |
|
"step": 16089 |
|
}, |
|
{ |
|
"epoch": 0.41018594502439615, |
|
"grad_norm": 1.4152425527572632, |
|
"learning_rate": 2.4733157703746663e-05, |
|
"loss": 1.759, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.41097476414944306, |
|
"grad_norm": 1.3504704236984253, |
|
"learning_rate": 2.4649181732523392e-05, |
|
"loss": 1.773, |
|
"step": 16151 |
|
}, |
|
{ |
|
"epoch": 0.41176358327449, |
|
"grad_norm": 1.3932607173919678, |
|
"learning_rate": 2.4565209720236582e-05, |
|
"loss": 1.7724, |
|
"step": 16182 |
|
}, |
|
{ |
|
"epoch": 0.4125524023995369, |
|
"grad_norm": 1.423255443572998, |
|
"learning_rate": 2.4481242614498975e-05, |
|
"loss": 1.7504, |
|
"step": 16213 |
|
}, |
|
{ |
|
"epoch": 0.4133412215245838, |
|
"grad_norm": 1.5146458148956299, |
|
"learning_rate": 2.439728136286796e-05, |
|
"loss": 1.7572, |
|
"step": 16244 |
|
}, |
|
{ |
|
"epoch": 0.4141300406496307, |
|
"grad_norm": 1.4159959554672241, |
|
"learning_rate": 2.4313326912834852e-05, |
|
"loss": 1.7495, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 0.41491885977467763, |
|
"grad_norm": 1.4505484104156494, |
|
"learning_rate": 2.4229380211814206e-05, |
|
"loss": 1.7748, |
|
"step": 16306 |
|
}, |
|
{ |
|
"epoch": 0.41570767889972454, |
|
"grad_norm": 1.4519730806350708, |
|
"learning_rate": 2.4145442207133124e-05, |
|
"loss": 1.7635, |
|
"step": 16337 |
|
}, |
|
{ |
|
"epoch": 0.41649649802477146, |
|
"grad_norm": 1.452431082725525, |
|
"learning_rate": 2.406151384602059e-05, |
|
"loss": 1.7624, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 0.41728531714981837, |
|
"grad_norm": 1.4315119981765747, |
|
"learning_rate": 2.3977596075596747e-05, |
|
"loss": 1.7765, |
|
"step": 16399 |
|
}, |
|
{ |
|
"epoch": 0.4180741362748653, |
|
"grad_norm": 1.4047067165374756, |
|
"learning_rate": 2.3893689842862223e-05, |
|
"loss": 1.755, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 0.4188629553999122, |
|
"grad_norm": 1.426621913909912, |
|
"learning_rate": 2.3809796094687475e-05, |
|
"loss": 1.7598, |
|
"step": 16461 |
|
}, |
|
{ |
|
"epoch": 0.4196517745249591, |
|
"grad_norm": 1.4108635187149048, |
|
"learning_rate": 2.372591577780202e-05, |
|
"loss": 1.7652, |
|
"step": 16492 |
|
}, |
|
{ |
|
"epoch": 0.420440593650006, |
|
"grad_norm": 1.4988287687301636, |
|
"learning_rate": 2.3642049838783838e-05, |
|
"loss": 1.7763, |
|
"step": 16523 |
|
}, |
|
{ |
|
"epoch": 0.42122941277505294, |
|
"grad_norm": 1.4525630474090576, |
|
"learning_rate": 2.3558199224048666e-05, |
|
"loss": 1.7607, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 0.42201823190009985, |
|
"grad_norm": 1.512402892112732, |
|
"learning_rate": 2.347436487983929e-05, |
|
"loss": 1.7625, |
|
"step": 16585 |
|
}, |
|
{ |
|
"epoch": 0.42280705102514676, |
|
"grad_norm": 1.4328192472457886, |
|
"learning_rate": 2.3390547752214888e-05, |
|
"loss": 1.7598, |
|
"step": 16616 |
|
}, |
|
{ |
|
"epoch": 0.42359587015019373, |
|
"grad_norm": 1.4110822677612305, |
|
"learning_rate": 2.330674878704035e-05, |
|
"loss": 1.757, |
|
"step": 16647 |
|
}, |
|
{ |
|
"epoch": 0.42438468927524065, |
|
"grad_norm": 1.4538228511810303, |
|
"learning_rate": 2.322296892997561e-05, |
|
"loss": 1.7503, |
|
"step": 16678 |
|
}, |
|
{ |
|
"epoch": 0.42517350840028756, |
|
"grad_norm": 1.4495991468429565, |
|
"learning_rate": 2.313920912646497e-05, |
|
"loss": 1.7593, |
|
"step": 16709 |
|
}, |
|
{ |
|
"epoch": 0.42596232752533447, |
|
"grad_norm": 1.5201659202575684, |
|
"learning_rate": 2.305547032172643e-05, |
|
"loss": 1.7512, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.4267511466503814, |
|
"grad_norm": 1.4683400392532349, |
|
"learning_rate": 2.2971753460741014e-05, |
|
"loss": 1.7792, |
|
"step": 16771 |
|
}, |
|
{ |
|
"epoch": 0.4275399657754283, |
|
"grad_norm": 1.4335435628890991, |
|
"learning_rate": 2.288805948824212e-05, |
|
"loss": 1.7495, |
|
"step": 16802 |
|
}, |
|
{ |
|
"epoch": 0.4283287849004752, |
|
"grad_norm": 1.494997501373291, |
|
"learning_rate": 2.2804389348704858e-05, |
|
"loss": 1.7806, |
|
"step": 16833 |
|
}, |
|
{ |
|
"epoch": 0.4291176040255221, |
|
"grad_norm": 1.5237140655517578, |
|
"learning_rate": 2.2720743986335374e-05, |
|
"loss": 1.7348, |
|
"step": 16864 |
|
}, |
|
{ |
|
"epoch": 0.42990642315056904, |
|
"grad_norm": 1.4462862014770508, |
|
"learning_rate": 2.2637124345060233e-05, |
|
"loss": 1.7663, |
|
"step": 16895 |
|
}, |
|
{ |
|
"epoch": 0.43069524227561595, |
|
"grad_norm": 1.4371618032455444, |
|
"learning_rate": 2.2553531368515695e-05, |
|
"loss": 1.7699, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 0.43148406140066287, |
|
"grad_norm": 1.4182896614074707, |
|
"learning_rate": 2.2469966000037144e-05, |
|
"loss": 1.7651, |
|
"step": 16957 |
|
}, |
|
{ |
|
"epoch": 0.4322728805257098, |
|
"grad_norm": 1.3784195184707642, |
|
"learning_rate": 2.2386429182648417e-05, |
|
"loss": 1.7596, |
|
"step": 16988 |
|
}, |
|
{ |
|
"epoch": 0.4330616996507567, |
|
"grad_norm": 1.398327350616455, |
|
"learning_rate": 2.230292185905114e-05, |
|
"loss": 1.7588, |
|
"step": 17019 |
|
}, |
|
{ |
|
"epoch": 0.4338505187758036, |
|
"grad_norm": 1.4239211082458496, |
|
"learning_rate": 2.2219444971614116e-05, |
|
"loss": 1.7656, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.4346393379008505, |
|
"grad_norm": 1.4027754068374634, |
|
"learning_rate": 2.2135999462362655e-05, |
|
"loss": 1.7485, |
|
"step": 17081 |
|
}, |
|
{ |
|
"epoch": 0.43542815702589743, |
|
"grad_norm": 1.442612648010254, |
|
"learning_rate": 2.2052586272968003e-05, |
|
"loss": 1.7682, |
|
"step": 17112 |
|
}, |
|
{ |
|
"epoch": 0.43621697615094435, |
|
"grad_norm": 1.3537038564682007, |
|
"learning_rate": 2.196920634473666e-05, |
|
"loss": 1.7511, |
|
"step": 17143 |
|
}, |
|
{ |
|
"epoch": 0.43700579527599126, |
|
"grad_norm": 1.3696125745773315, |
|
"learning_rate": 2.1885860618599787e-05, |
|
"loss": 1.767, |
|
"step": 17174 |
|
}, |
|
{ |
|
"epoch": 0.4377946144010382, |
|
"grad_norm": 1.5365840196609497, |
|
"learning_rate": 2.1802550035102577e-05, |
|
"loss": 1.7527, |
|
"step": 17205 |
|
}, |
|
{ |
|
"epoch": 0.4385834335260851, |
|
"grad_norm": 1.4375520944595337, |
|
"learning_rate": 2.171927553439363e-05, |
|
"loss": 1.7577, |
|
"step": 17236 |
|
}, |
|
{ |
|
"epoch": 0.439372252651132, |
|
"grad_norm": 1.4054752588272095, |
|
"learning_rate": 2.1636038056214376e-05, |
|
"loss": 1.7479, |
|
"step": 17267 |
|
}, |
|
{ |
|
"epoch": 0.4401610717761789, |
|
"grad_norm": 1.4836634397506714, |
|
"learning_rate": 2.155283853988844e-05, |
|
"loss": 1.7463, |
|
"step": 17298 |
|
}, |
|
{ |
|
"epoch": 0.4409498909012258, |
|
"grad_norm": 1.4966789484024048, |
|
"learning_rate": 2.146967792431106e-05, |
|
"loss": 1.7539, |
|
"step": 17329 |
|
}, |
|
{ |
|
"epoch": 0.44173871002627274, |
|
"grad_norm": 1.3743985891342163, |
|
"learning_rate": 2.138655714793849e-05, |
|
"loss": 1.7501, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.44252752915131965, |
|
"grad_norm": 1.4786440134048462, |
|
"learning_rate": 2.1303477148777367e-05, |
|
"loss": 1.7438, |
|
"step": 17391 |
|
}, |
|
{ |
|
"epoch": 0.4433163482763666, |
|
"grad_norm": 1.3931723833084106, |
|
"learning_rate": 2.122043886437421e-05, |
|
"loss": 1.7569, |
|
"step": 17422 |
|
}, |
|
{ |
|
"epoch": 0.44410516740141354, |
|
"grad_norm": 1.457221508026123, |
|
"learning_rate": 2.1137443231804765e-05, |
|
"loss": 1.7459, |
|
"step": 17453 |
|
}, |
|
{ |
|
"epoch": 0.44489398652646045, |
|
"grad_norm": 1.4700186252593994, |
|
"learning_rate": 2.105449118766347e-05, |
|
"loss": 1.749, |
|
"step": 17484 |
|
}, |
|
{ |
|
"epoch": 0.44568280565150736, |
|
"grad_norm": 1.4787609577178955, |
|
"learning_rate": 2.097158366805287e-05, |
|
"loss": 1.7433, |
|
"step": 17515 |
|
}, |
|
{ |
|
"epoch": 0.4464716247765543, |
|
"grad_norm": 1.435116171836853, |
|
"learning_rate": 2.0888721608573047e-05, |
|
"loss": 1.7492, |
|
"step": 17546 |
|
}, |
|
{ |
|
"epoch": 0.4472604439016012, |
|
"grad_norm": 1.3931212425231934, |
|
"learning_rate": 2.0805905944311087e-05, |
|
"loss": 1.7698, |
|
"step": 17577 |
|
}, |
|
{ |
|
"epoch": 0.4480492630266481, |
|
"grad_norm": 1.4713780879974365, |
|
"learning_rate": 2.0723137609830497e-05, |
|
"loss": 1.7599, |
|
"step": 17608 |
|
}, |
|
{ |
|
"epoch": 0.448838082151695, |
|
"grad_norm": 1.3709975481033325, |
|
"learning_rate": 2.0640417539160686e-05, |
|
"loss": 1.7615, |
|
"step": 17639 |
|
}, |
|
{ |
|
"epoch": 0.44962690127674193, |
|
"grad_norm": 1.4991896152496338, |
|
"learning_rate": 2.0557746665786427e-05, |
|
"loss": 1.7541, |
|
"step": 17670 |
|
}, |
|
{ |
|
"epoch": 0.45041572040178884, |
|
"grad_norm": 1.4068297147750854, |
|
"learning_rate": 2.0475125922637256e-05, |
|
"loss": 1.7383, |
|
"step": 17701 |
|
}, |
|
{ |
|
"epoch": 0.45120453952683576, |
|
"grad_norm": 1.4071435928344727, |
|
"learning_rate": 2.0392556242077047e-05, |
|
"loss": 1.754, |
|
"step": 17732 |
|
}, |
|
{ |
|
"epoch": 0.45199335865188267, |
|
"grad_norm": 1.5196751356124878, |
|
"learning_rate": 2.031003855589343e-05, |
|
"loss": 1.7406, |
|
"step": 17763 |
|
}, |
|
{ |
|
"epoch": 0.4527821777769296, |
|
"grad_norm": 1.4674859046936035, |
|
"learning_rate": 2.022757379528727e-05, |
|
"loss": 1.7496, |
|
"step": 17794 |
|
}, |
|
{ |
|
"epoch": 0.4535709969019765, |
|
"grad_norm": 1.377008318901062, |
|
"learning_rate": 2.0145162890862184e-05, |
|
"loss": 1.7573, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.4543598160270234, |
|
"grad_norm": 1.3753769397735596, |
|
"learning_rate": 2.0062806772614022e-05, |
|
"loss": 1.7312, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.4551486351520703, |
|
"grad_norm": 1.3770841360092163, |
|
"learning_rate": 1.9980506369920392e-05, |
|
"loss": 1.7378, |
|
"step": 17887 |
|
}, |
|
{ |
|
"epoch": 0.45593745427711724, |
|
"grad_norm": 1.644900918006897, |
|
"learning_rate": 1.989826261153015e-05, |
|
"loss": 1.7423, |
|
"step": 17918 |
|
}, |
|
{ |
|
"epoch": 0.45672627340216415, |
|
"grad_norm": 1.423464059829712, |
|
"learning_rate": 1.9816076425552923e-05, |
|
"loss": 1.745, |
|
"step": 17949 |
|
}, |
|
{ |
|
"epoch": 0.45751509252721106, |
|
"grad_norm": 1.4013458490371704, |
|
"learning_rate": 1.9733948739448676e-05, |
|
"loss": 1.7444, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.458303911652258, |
|
"grad_norm": 1.4134126901626587, |
|
"learning_rate": 1.9651880480017155e-05, |
|
"loss": 1.7318, |
|
"step": 18011 |
|
}, |
|
{ |
|
"epoch": 0.4590927307773049, |
|
"grad_norm": 1.389404535293579, |
|
"learning_rate": 1.9569872573387516e-05, |
|
"loss": 1.7531, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 0.4598815499023518, |
|
"grad_norm": 1.4825111627578735, |
|
"learning_rate": 1.9487925945007854e-05, |
|
"loss": 1.7178, |
|
"step": 18073 |
|
}, |
|
{ |
|
"epoch": 0.4606703690273987, |
|
"grad_norm": 1.335856556892395, |
|
"learning_rate": 1.9406041519634726e-05, |
|
"loss": 1.7569, |
|
"step": 18104 |
|
}, |
|
{ |
|
"epoch": 0.46145918815244563, |
|
"grad_norm": 1.5451414585113525, |
|
"learning_rate": 1.932422022132275e-05, |
|
"loss": 1.7608, |
|
"step": 18135 |
|
}, |
|
{ |
|
"epoch": 0.4622480072774926, |
|
"grad_norm": 1.458856225013733, |
|
"learning_rate": 1.924246297341414e-05, |
|
"loss": 1.7381, |
|
"step": 18166 |
|
}, |
|
{ |
|
"epoch": 0.4630368264025395, |
|
"grad_norm": 1.5138990879058838, |
|
"learning_rate": 1.9160770698528338e-05, |
|
"loss": 1.7505, |
|
"step": 18197 |
|
}, |
|
{ |
|
"epoch": 0.4638256455275864, |
|
"grad_norm": 1.433817744255066, |
|
"learning_rate": 1.907914431855156e-05, |
|
"loss": 1.7406, |
|
"step": 18228 |
|
}, |
|
{ |
|
"epoch": 0.46461446465263334, |
|
"grad_norm": 1.3636925220489502, |
|
"learning_rate": 1.8997584754626412e-05, |
|
"loss": 1.7533, |
|
"step": 18259 |
|
}, |
|
{ |
|
"epoch": 0.46540328377768025, |
|
"grad_norm": 1.4643160104751587, |
|
"learning_rate": 1.8916092927141486e-05, |
|
"loss": 1.7329, |
|
"step": 18290 |
|
}, |
|
{ |
|
"epoch": 0.46619210290272717, |
|
"grad_norm": 1.3883280754089355, |
|
"learning_rate": 1.883466975572098e-05, |
|
"loss": 1.7386, |
|
"step": 18321 |
|
}, |
|
{ |
|
"epoch": 0.4669809220277741, |
|
"grad_norm": 1.4294878244400024, |
|
"learning_rate": 1.8753316159214312e-05, |
|
"loss": 1.7553, |
|
"step": 18352 |
|
}, |
|
{ |
|
"epoch": 0.467769741152821, |
|
"grad_norm": 1.4422011375427246, |
|
"learning_rate": 1.8672033055685766e-05, |
|
"loss": 1.7333, |
|
"step": 18383 |
|
}, |
|
{ |
|
"epoch": 0.4685585602778679, |
|
"grad_norm": 1.4707412719726562, |
|
"learning_rate": 1.8590821362404116e-05, |
|
"loss": 1.7463, |
|
"step": 18414 |
|
}, |
|
{ |
|
"epoch": 0.4693473794029148, |
|
"grad_norm": 1.4446028470993042, |
|
"learning_rate": 1.8509681995832294e-05, |
|
"loss": 1.7262, |
|
"step": 18445 |
|
}, |
|
{ |
|
"epoch": 0.47013619852796174, |
|
"grad_norm": 1.3790693283081055, |
|
"learning_rate": 1.8428615871617004e-05, |
|
"loss": 1.7442, |
|
"step": 18476 |
|
}, |
|
{ |
|
"epoch": 0.47092501765300865, |
|
"grad_norm": 1.4067668914794922, |
|
"learning_rate": 1.8347623904578448e-05, |
|
"loss": 1.731, |
|
"step": 18507 |
|
}, |
|
{ |
|
"epoch": 0.47171383677805556, |
|
"grad_norm": 1.496756672859192, |
|
"learning_rate": 1.8266707008699975e-05, |
|
"loss": 1.7546, |
|
"step": 18538 |
|
}, |
|
{ |
|
"epoch": 0.4725026559031025, |
|
"grad_norm": 1.4508312940597534, |
|
"learning_rate": 1.818586609711774e-05, |
|
"loss": 1.748, |
|
"step": 18569 |
|
}, |
|
{ |
|
"epoch": 0.4732914750281494, |
|
"grad_norm": 1.4680043458938599, |
|
"learning_rate": 1.8105102082110462e-05, |
|
"loss": 1.7334, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.4740802941531963, |
|
"grad_norm": 1.512750267982483, |
|
"learning_rate": 1.8024415875089058e-05, |
|
"loss": 1.7437, |
|
"step": 18631 |
|
}, |
|
{ |
|
"epoch": 0.4748691132782432, |
|
"grad_norm": 1.4424457550048828, |
|
"learning_rate": 1.7943808386586407e-05, |
|
"loss": 1.7454, |
|
"step": 18662 |
|
}, |
|
{ |
|
"epoch": 0.47565793240329013, |
|
"grad_norm": 1.47055983543396, |
|
"learning_rate": 1.7863280526247073e-05, |
|
"loss": 1.7132, |
|
"step": 18693 |
|
}, |
|
{ |
|
"epoch": 0.47644675152833704, |
|
"grad_norm": 1.3706848621368408, |
|
"learning_rate": 1.7782833202817003e-05, |
|
"loss": 1.7329, |
|
"step": 18724 |
|
}, |
|
{ |
|
"epoch": 0.47723557065338396, |
|
"grad_norm": 1.3911017179489136, |
|
"learning_rate": 1.7702467324133327e-05, |
|
"loss": 1.7322, |
|
"step": 18755 |
|
}, |
|
{ |
|
"epoch": 0.47802438977843087, |
|
"grad_norm": 1.3935508728027344, |
|
"learning_rate": 1.7622183797114042e-05, |
|
"loss": 1.7463, |
|
"step": 18786 |
|
}, |
|
{ |
|
"epoch": 0.4788132089034778, |
|
"grad_norm": 1.3628978729248047, |
|
"learning_rate": 1.7541983527747838e-05, |
|
"loss": 1.7529, |
|
"step": 18817 |
|
}, |
|
{ |
|
"epoch": 0.4796020280285247, |
|
"grad_norm": 1.3993347883224487, |
|
"learning_rate": 1.746186742108387e-05, |
|
"loss": 1.7517, |
|
"step": 18848 |
|
}, |
|
{ |
|
"epoch": 0.4803908471535716, |
|
"grad_norm": 1.4427608251571655, |
|
"learning_rate": 1.73818363812215e-05, |
|
"loss": 1.7499, |
|
"step": 18879 |
|
}, |
|
{ |
|
"epoch": 0.4811796662786185, |
|
"grad_norm": 1.4312776327133179, |
|
"learning_rate": 1.7301891311300153e-05, |
|
"loss": 1.7471, |
|
"step": 18910 |
|
}, |
|
{ |
|
"epoch": 0.4819684854036655, |
|
"grad_norm": 1.373559832572937, |
|
"learning_rate": 1.7222033113489055e-05, |
|
"loss": 1.7555, |
|
"step": 18941 |
|
}, |
|
{ |
|
"epoch": 0.4827573045287124, |
|
"grad_norm": 1.383086085319519, |
|
"learning_rate": 1.7142262688977127e-05, |
|
"loss": 1.7446, |
|
"step": 18972 |
|
}, |
|
{ |
|
"epoch": 0.4835461236537593, |
|
"grad_norm": 1.459486722946167, |
|
"learning_rate": 1.7062580937962764e-05, |
|
"loss": 1.7523, |
|
"step": 19003 |
|
}, |
|
{ |
|
"epoch": 0.48433494277880623, |
|
"grad_norm": 1.5249037742614746, |
|
"learning_rate": 1.698298875964369e-05, |
|
"loss": 1.7249, |
|
"step": 19034 |
|
}, |
|
{ |
|
"epoch": 0.48512376190385315, |
|
"grad_norm": 1.431281566619873, |
|
"learning_rate": 1.690348705220684e-05, |
|
"loss": 1.7133, |
|
"step": 19065 |
|
}, |
|
{ |
|
"epoch": 0.48591258102890006, |
|
"grad_norm": 1.4861342906951904, |
|
"learning_rate": 1.6824076712818156e-05, |
|
"loss": 1.7377, |
|
"step": 19096 |
|
}, |
|
{ |
|
"epoch": 0.486701400153947, |
|
"grad_norm": 1.3854913711547852, |
|
"learning_rate": 1.6744758637612533e-05, |
|
"loss": 1.7292, |
|
"step": 19127 |
|
}, |
|
{ |
|
"epoch": 0.4874902192789939, |
|
"grad_norm": 1.482332468032837, |
|
"learning_rate": 1.6665533721683664e-05, |
|
"loss": 1.7505, |
|
"step": 19158 |
|
}, |
|
{ |
|
"epoch": 0.4882790384040408, |
|
"grad_norm": 1.3565430641174316, |
|
"learning_rate": 1.6586402859073974e-05, |
|
"loss": 1.742, |
|
"step": 19189 |
|
}, |
|
{ |
|
"epoch": 0.4890678575290877, |
|
"grad_norm": 1.445395588874817, |
|
"learning_rate": 1.6507366942764463e-05, |
|
"loss": 1.7387, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 0.4898566766541346, |
|
"grad_norm": 1.4928544759750366, |
|
"learning_rate": 1.6428426864664732e-05, |
|
"loss": 1.762, |
|
"step": 19251 |
|
}, |
|
{ |
|
"epoch": 0.49064549577918154, |
|
"grad_norm": 1.38858962059021, |
|
"learning_rate": 1.6349583515602816e-05, |
|
"loss": 1.7486, |
|
"step": 19282 |
|
}, |
|
{ |
|
"epoch": 0.49143431490422845, |
|
"grad_norm": 1.3937194347381592, |
|
"learning_rate": 1.6270837785315208e-05, |
|
"loss": 1.7542, |
|
"step": 19313 |
|
}, |
|
{ |
|
"epoch": 0.49222313402927537, |
|
"grad_norm": 1.501042127609253, |
|
"learning_rate": 1.619219056243676e-05, |
|
"loss": 1.7274, |
|
"step": 19344 |
|
}, |
|
{ |
|
"epoch": 0.4930119531543223, |
|
"grad_norm": 1.3143610954284668, |
|
"learning_rate": 1.6113642734490698e-05, |
|
"loss": 1.7137, |
|
"step": 19375 |
|
}, |
|
{ |
|
"epoch": 0.4938007722793692, |
|
"grad_norm": 1.4225116968154907, |
|
"learning_rate": 1.6035195187878577e-05, |
|
"loss": 1.7414, |
|
"step": 19406 |
|
}, |
|
{ |
|
"epoch": 0.4945895914044161, |
|
"grad_norm": 1.4575517177581787, |
|
"learning_rate": 1.5956848807870305e-05, |
|
"loss": 1.7044, |
|
"step": 19437 |
|
}, |
|
{ |
|
"epoch": 0.495378410529463, |
|
"grad_norm": 1.4163532257080078, |
|
"learning_rate": 1.587860447859413e-05, |
|
"loss": 1.7365, |
|
"step": 19468 |
|
}, |
|
{ |
|
"epoch": 0.49616722965450993, |
|
"grad_norm": 1.504955530166626, |
|
"learning_rate": 1.5800463083026686e-05, |
|
"loss": 1.759, |
|
"step": 19499 |
|
}, |
|
{ |
|
"epoch": 0.49695604877955685, |
|
"grad_norm": 1.4385664463043213, |
|
"learning_rate": 1.572242550298298e-05, |
|
"loss": 1.7193, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 0.49774486790460376, |
|
"grad_norm": 1.384371280670166, |
|
"learning_rate": 1.56444926191065e-05, |
|
"loss": 1.7224, |
|
"step": 19561 |
|
}, |
|
{ |
|
"epoch": 0.4985336870296507, |
|
"grad_norm": 1.392520785331726, |
|
"learning_rate": 1.5566665310859257e-05, |
|
"loss": 1.7246, |
|
"step": 19592 |
|
}, |
|
{ |
|
"epoch": 0.4993225061546976, |
|
"grad_norm": 1.421629786491394, |
|
"learning_rate": 1.5488944456511846e-05, |
|
"loss": 1.7314, |
|
"step": 19623 |
|
}, |
|
{ |
|
"epoch": 0.5001113252797446, |
|
"grad_norm": 1.405013918876648, |
|
"learning_rate": 1.5411330933133546e-05, |
|
"loss": 1.7329, |
|
"step": 19654 |
|
}, |
|
{ |
|
"epoch": 0.5009001444047915, |
|
"grad_norm": 1.372490406036377, |
|
"learning_rate": 1.533382561658241e-05, |
|
"loss": 1.7253, |
|
"step": 19685 |
|
}, |
|
{ |
|
"epoch": 0.5016889635298384, |
|
"grad_norm": 1.4861042499542236, |
|
"learning_rate": 1.525642938149541e-05, |
|
"loss": 1.7317, |
|
"step": 19716 |
|
}, |
|
{ |
|
"epoch": 0.5024777826548853, |
|
"grad_norm": 1.4108079671859741, |
|
"learning_rate": 1.5179143101278536e-05, |
|
"loss": 1.7391, |
|
"step": 19747 |
|
}, |
|
{ |
|
"epoch": 0.5032666017799322, |
|
"grad_norm": 1.4616518020629883, |
|
"learning_rate": 1.5101967648096955e-05, |
|
"loss": 1.7129, |
|
"step": 19778 |
|
}, |
|
{ |
|
"epoch": 0.5040554209049791, |
|
"grad_norm": 1.3660775423049927, |
|
"learning_rate": 1.5024903892865172e-05, |
|
"loss": 1.7149, |
|
"step": 19809 |
|
}, |
|
{ |
|
"epoch": 0.504844240030026, |
|
"grad_norm": 1.4286696910858154, |
|
"learning_rate": 1.4947952705237184e-05, |
|
"loss": 1.6982, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.505633059155073, |
|
"grad_norm": 1.4002240896224976, |
|
"learning_rate": 1.4871114953596682e-05, |
|
"loss": 1.7319, |
|
"step": 19871 |
|
}, |
|
{ |
|
"epoch": 0.5064218782801199, |
|
"grad_norm": 1.4784040451049805, |
|
"learning_rate": 1.4794391505047256e-05, |
|
"loss": 1.7193, |
|
"step": 19902 |
|
}, |
|
{ |
|
"epoch": 0.5072106974051668, |
|
"grad_norm": 1.4451029300689697, |
|
"learning_rate": 1.4717783225402596e-05, |
|
"loss": 1.7225, |
|
"step": 19933 |
|
}, |
|
{ |
|
"epoch": 0.5079995165302137, |
|
"grad_norm": 1.438502550125122, |
|
"learning_rate": 1.4641290979176735e-05, |
|
"loss": 1.7186, |
|
"step": 19964 |
|
}, |
|
{ |
|
"epoch": 0.5087883356552606, |
|
"grad_norm": 1.4443246126174927, |
|
"learning_rate": 1.4564915629574246e-05, |
|
"loss": 1.7242, |
|
"step": 19995 |
|
}, |
|
{ |
|
"epoch": 0.5095771547803075, |
|
"grad_norm": 1.5206542015075684, |
|
"learning_rate": 1.4488658038480601e-05, |
|
"loss": 1.7406, |
|
"step": 20026 |
|
}, |
|
{ |
|
"epoch": 0.5103659739053544, |
|
"grad_norm": 1.4452012777328491, |
|
"learning_rate": 1.4412519066452323e-05, |
|
"loss": 1.7218, |
|
"step": 20057 |
|
}, |
|
{ |
|
"epoch": 0.5111547930304013, |
|
"grad_norm": 1.4169068336486816, |
|
"learning_rate": 1.4336499572707373e-05, |
|
"loss": 1.7365, |
|
"step": 20088 |
|
}, |
|
{ |
|
"epoch": 0.5119436121554483, |
|
"grad_norm": 1.475844383239746, |
|
"learning_rate": 1.4260600415115433e-05, |
|
"loss": 1.7264, |
|
"step": 20119 |
|
}, |
|
{ |
|
"epoch": 0.5127324312804952, |
|
"grad_norm": 1.4148321151733398, |
|
"learning_rate": 1.4184822450188137e-05, |
|
"loss": 1.7348, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.5135212504055421, |
|
"grad_norm": 1.4532842636108398, |
|
"learning_rate": 1.410916653306954e-05, |
|
"loss": 1.7021, |
|
"step": 20181 |
|
}, |
|
{ |
|
"epoch": 0.514310069530589, |
|
"grad_norm": 1.4390312433242798, |
|
"learning_rate": 1.403363351752639e-05, |
|
"loss": 1.7205, |
|
"step": 20212 |
|
}, |
|
{ |
|
"epoch": 0.5150988886556359, |
|
"grad_norm": 1.4490697383880615, |
|
"learning_rate": 1.3958224255938485e-05, |
|
"loss": 1.7235, |
|
"step": 20243 |
|
}, |
|
{ |
|
"epoch": 0.5158877077806828, |
|
"grad_norm": 1.4487396478652954, |
|
"learning_rate": 1.388293959928911e-05, |
|
"loss": 1.7325, |
|
"step": 20274 |
|
}, |
|
{ |
|
"epoch": 0.5166765269057297, |
|
"grad_norm": 1.3987274169921875, |
|
"learning_rate": 1.3807780397155379e-05, |
|
"loss": 1.707, |
|
"step": 20305 |
|
}, |
|
{ |
|
"epoch": 0.5174653460307767, |
|
"grad_norm": 1.4041749238967896, |
|
"learning_rate": 1.3732747497698655e-05, |
|
"loss": 1.7179, |
|
"step": 20336 |
|
}, |
|
{ |
|
"epoch": 0.5182541651558236, |
|
"grad_norm": 1.442674994468689, |
|
"learning_rate": 1.3657841747655038e-05, |
|
"loss": 1.7382, |
|
"step": 20367 |
|
}, |
|
{ |
|
"epoch": 0.5190429842808705, |
|
"grad_norm": 1.4303447008132935, |
|
"learning_rate": 1.3583063992325706e-05, |
|
"loss": 1.7375, |
|
"step": 20398 |
|
}, |
|
{ |
|
"epoch": 0.5198318034059174, |
|
"grad_norm": 1.4175372123718262, |
|
"learning_rate": 1.3508415075567496e-05, |
|
"loss": 1.7126, |
|
"step": 20429 |
|
}, |
|
{ |
|
"epoch": 0.5206206225309643, |
|
"grad_norm": 1.4764102697372437, |
|
"learning_rate": 1.343389583978327e-05, |
|
"loss": 1.7372, |
|
"step": 20460 |
|
}, |
|
{ |
|
"epoch": 0.5214094416560112, |
|
"grad_norm": 1.4597842693328857, |
|
"learning_rate": 1.3359507125912468e-05, |
|
"loss": 1.7247, |
|
"step": 20491 |
|
}, |
|
{ |
|
"epoch": 0.5221982607810581, |
|
"grad_norm": 1.4972703456878662, |
|
"learning_rate": 1.3285249773421627e-05, |
|
"loss": 1.7158, |
|
"step": 20522 |
|
}, |
|
{ |
|
"epoch": 0.522987079906105, |
|
"grad_norm": 1.4102123975753784, |
|
"learning_rate": 1.3211124620294884e-05, |
|
"loss": 1.74, |
|
"step": 20553 |
|
}, |
|
{ |
|
"epoch": 0.523775899031152, |
|
"grad_norm": 1.3836309909820557, |
|
"learning_rate": 1.313713250302451e-05, |
|
"loss": 1.733, |
|
"step": 20584 |
|
}, |
|
{ |
|
"epoch": 0.5245647181561989, |
|
"grad_norm": 1.4065951108932495, |
|
"learning_rate": 1.3063274256601479e-05, |
|
"loss": 1.7291, |
|
"step": 20615 |
|
}, |
|
{ |
|
"epoch": 0.5253535372812458, |
|
"grad_norm": 1.4294134378433228, |
|
"learning_rate": 1.2989550714506086e-05, |
|
"loss": 1.7196, |
|
"step": 20646 |
|
}, |
|
{ |
|
"epoch": 0.5261423564062927, |
|
"grad_norm": 1.438848853111267, |
|
"learning_rate": 1.291596270869846e-05, |
|
"loss": 1.7294, |
|
"step": 20677 |
|
}, |
|
{ |
|
"epoch": 0.5269311755313396, |
|
"grad_norm": 1.3648425340652466, |
|
"learning_rate": 1.284251106960927e-05, |
|
"loss": 1.725, |
|
"step": 20708 |
|
}, |
|
{ |
|
"epoch": 0.5277199946563865, |
|
"grad_norm": 1.4666975736618042, |
|
"learning_rate": 1.2769196626130263e-05, |
|
"loss": 1.7272, |
|
"step": 20739 |
|
}, |
|
{ |
|
"epoch": 0.5285088137814334, |
|
"grad_norm": 1.4472864866256714, |
|
"learning_rate": 1.2696020205604969e-05, |
|
"loss": 1.7216, |
|
"step": 20770 |
|
}, |
|
{ |
|
"epoch": 0.5292976329064804, |
|
"grad_norm": 1.4326000213623047, |
|
"learning_rate": 1.2622982633819359e-05, |
|
"loss": 1.7263, |
|
"step": 20801 |
|
}, |
|
{ |
|
"epoch": 0.5300864520315273, |
|
"grad_norm": 1.468807578086853, |
|
"learning_rate": 1.2550084734992484e-05, |
|
"loss": 1.7366, |
|
"step": 20832 |
|
}, |
|
{ |
|
"epoch": 0.5308752711565742, |
|
"grad_norm": 1.3874242305755615, |
|
"learning_rate": 1.247732733176724e-05, |
|
"loss": 1.7235, |
|
"step": 20863 |
|
}, |
|
{ |
|
"epoch": 0.5316640902816212, |
|
"grad_norm": 1.4644588232040405, |
|
"learning_rate": 1.2404711245201044e-05, |
|
"loss": 1.7363, |
|
"step": 20894 |
|
}, |
|
{ |
|
"epoch": 0.5324529094066681, |
|
"grad_norm": 1.4458835124969482, |
|
"learning_rate": 1.2332237294756535e-05, |
|
"loss": 1.7062, |
|
"step": 20925 |
|
}, |
|
{ |
|
"epoch": 0.533241728531715, |
|
"grad_norm": 1.4956963062286377, |
|
"learning_rate": 1.225990629829241e-05, |
|
"loss": 1.7244, |
|
"step": 20956 |
|
}, |
|
{ |
|
"epoch": 0.534030547656762, |
|
"grad_norm": 1.4594619274139404, |
|
"learning_rate": 1.2187719072054136e-05, |
|
"loss": 1.7074, |
|
"step": 20987 |
|
}, |
|
{ |
|
"epoch": 0.5348193667818089, |
|
"grad_norm": 1.4499660730361938, |
|
"learning_rate": 1.2115676430664735e-05, |
|
"loss": 1.7154, |
|
"step": 21018 |
|
}, |
|
{ |
|
"epoch": 0.5356081859068558, |
|
"grad_norm": 1.5303255319595337, |
|
"learning_rate": 1.2043779187115647e-05, |
|
"loss": 1.7284, |
|
"step": 21049 |
|
}, |
|
{ |
|
"epoch": 0.5363970050319027, |
|
"grad_norm": 1.3913129568099976, |
|
"learning_rate": 1.1972028152757476e-05, |
|
"loss": 1.7328, |
|
"step": 21080 |
|
}, |
|
{ |
|
"epoch": 0.5371858241569496, |
|
"grad_norm": 1.4718728065490723, |
|
"learning_rate": 1.1900424137290889e-05, |
|
"loss": 1.7208, |
|
"step": 21111 |
|
}, |
|
{ |
|
"epoch": 0.5379746432819965, |
|
"grad_norm": 1.3919767141342163, |
|
"learning_rate": 1.1828967948757482e-05, |
|
"loss": 1.7143, |
|
"step": 21142 |
|
}, |
|
{ |
|
"epoch": 0.5387634624070434, |
|
"grad_norm": 1.4659541845321655, |
|
"learning_rate": 1.175766039353062e-05, |
|
"loss": 1.7111, |
|
"step": 21173 |
|
}, |
|
{ |
|
"epoch": 0.5395522815320903, |
|
"grad_norm": 1.4828646183013916, |
|
"learning_rate": 1.1686502276306382e-05, |
|
"loss": 1.7113, |
|
"step": 21204 |
|
}, |
|
{ |
|
"epoch": 0.5403411006571373, |
|
"grad_norm": 1.458970308303833, |
|
"learning_rate": 1.1615494400094445e-05, |
|
"loss": 1.7199, |
|
"step": 21235 |
|
}, |
|
{ |
|
"epoch": 0.5411299197821842, |
|
"grad_norm": 1.4522119760513306, |
|
"learning_rate": 1.1544637566209029e-05, |
|
"loss": 1.7052, |
|
"step": 21266 |
|
}, |
|
{ |
|
"epoch": 0.5419187389072311, |
|
"grad_norm": 1.4456357955932617, |
|
"learning_rate": 1.1473932574259886e-05, |
|
"loss": 1.7201, |
|
"step": 21297 |
|
}, |
|
{ |
|
"epoch": 0.542707558032278, |
|
"grad_norm": 1.4089595079421997, |
|
"learning_rate": 1.1403380222143247e-05, |
|
"loss": 1.705, |
|
"step": 21328 |
|
}, |
|
{ |
|
"epoch": 0.5434963771573249, |
|
"grad_norm": 1.4137688875198364, |
|
"learning_rate": 1.1332981306032808e-05, |
|
"loss": 1.7135, |
|
"step": 21359 |
|
}, |
|
{ |
|
"epoch": 0.5442851962823718, |
|
"grad_norm": 1.4155645370483398, |
|
"learning_rate": 1.1262736620370762e-05, |
|
"loss": 1.7159, |
|
"step": 21390 |
|
}, |
|
{ |
|
"epoch": 0.5450740154074187, |
|
"grad_norm": 1.5558688640594482, |
|
"learning_rate": 1.1192646957858854e-05, |
|
"loss": 1.7283, |
|
"step": 21421 |
|
}, |
|
{ |
|
"epoch": 0.5458628345324656, |
|
"grad_norm": 1.5027565956115723, |
|
"learning_rate": 1.1122713109449381e-05, |
|
"loss": 1.7135, |
|
"step": 21452 |
|
}, |
|
{ |
|
"epoch": 0.5466516536575126, |
|
"grad_norm": 1.499029517173767, |
|
"learning_rate": 1.105293586433634e-05, |
|
"loss": 1.7208, |
|
"step": 21483 |
|
}, |
|
{ |
|
"epoch": 0.5474404727825595, |
|
"grad_norm": 1.4107885360717773, |
|
"learning_rate": 1.0983316009946446e-05, |
|
"loss": 1.7199, |
|
"step": 21514 |
|
}, |
|
{ |
|
"epoch": 0.5482292919076064, |
|
"grad_norm": 1.3750280141830444, |
|
"learning_rate": 1.0913854331930282e-05, |
|
"loss": 1.6973, |
|
"step": 21545 |
|
}, |
|
{ |
|
"epoch": 0.5490181110326533, |
|
"grad_norm": 1.4192049503326416, |
|
"learning_rate": 1.0844551614153456e-05, |
|
"loss": 1.7034, |
|
"step": 21576 |
|
}, |
|
{ |
|
"epoch": 0.5498069301577002, |
|
"grad_norm": 1.422545075416565, |
|
"learning_rate": 1.0775408638687725e-05, |
|
"loss": 1.7168, |
|
"step": 21607 |
|
}, |
|
{ |
|
"epoch": 0.5505957492827471, |
|
"grad_norm": 1.4749089479446411, |
|
"learning_rate": 1.0706426185802165e-05, |
|
"loss": 1.7169, |
|
"step": 21638 |
|
}, |
|
{ |
|
"epoch": 0.551384568407794, |
|
"grad_norm": 1.4102238416671753, |
|
"learning_rate": 1.0637605033954371e-05, |
|
"loss": 1.7195, |
|
"step": 21669 |
|
}, |
|
{ |
|
"epoch": 0.552173387532841, |
|
"grad_norm": 1.4288344383239746, |
|
"learning_rate": 1.05689459597817e-05, |
|
"loss": 1.704, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.5529622066578879, |
|
"grad_norm": 1.4801214933395386, |
|
"learning_rate": 1.050044973809246e-05, |
|
"loss": 1.7011, |
|
"step": 21731 |
|
}, |
|
{ |
|
"epoch": 0.5537510257829348, |
|
"grad_norm": 1.4600056409835815, |
|
"learning_rate": 1.043211714185722e-05, |
|
"loss": 1.714, |
|
"step": 21762 |
|
}, |
|
{ |
|
"epoch": 0.5545398449079817, |
|
"grad_norm": 1.405286192893982, |
|
"learning_rate": 1.036394894220003e-05, |
|
"loss": 1.7098, |
|
"step": 21793 |
|
}, |
|
{ |
|
"epoch": 0.5553286640330286, |
|
"grad_norm": 1.4454749822616577, |
|
"learning_rate": 1.0295945908389751e-05, |
|
"loss": 1.7152, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 0.5561174831580755, |
|
"grad_norm": 1.5381968021392822, |
|
"learning_rate": 1.0228108807831393e-05, |
|
"loss": 1.7199, |
|
"step": 21855 |
|
}, |
|
{ |
|
"epoch": 0.5569063022831224, |
|
"grad_norm": 1.426140546798706, |
|
"learning_rate": 1.01604384060574e-05, |
|
"loss": 1.7317, |
|
"step": 21886 |
|
}, |
|
{ |
|
"epoch": 0.5576951214081693, |
|
"grad_norm": 1.6093019247055054, |
|
"learning_rate": 1.009293546671907e-05, |
|
"loss": 1.6993, |
|
"step": 21917 |
|
}, |
|
{ |
|
"epoch": 0.5584839405332163, |
|
"grad_norm": 1.344679355621338, |
|
"learning_rate": 1.002560075157791e-05, |
|
"loss": 1.7258, |
|
"step": 21948 |
|
}, |
|
{ |
|
"epoch": 0.5592727596582632, |
|
"grad_norm": 1.3664970397949219, |
|
"learning_rate": 9.958435020496995e-06, |
|
"loss": 1.71, |
|
"step": 21979 |
|
}, |
|
{ |
|
"epoch": 0.5600615787833101, |
|
"grad_norm": 1.457160234451294, |
|
"learning_rate": 9.89143903143249e-06, |
|
"loss": 1.7173, |
|
"step": 22010 |
|
}, |
|
{ |
|
"epoch": 0.560850397908357, |
|
"grad_norm": 1.3795216083526611, |
|
"learning_rate": 9.824613540425038e-06, |
|
"loss": 1.6924, |
|
"step": 22041 |
|
}, |
|
{ |
|
"epoch": 0.5616392170334039, |
|
"grad_norm": 1.3805229663848877, |
|
"learning_rate": 9.757959301591197e-06, |
|
"loss": 1.7149, |
|
"step": 22072 |
|
}, |
|
{ |
|
"epoch": 0.5624280361584508, |
|
"grad_norm": 1.4958610534667969, |
|
"learning_rate": 9.691477067115017e-06, |
|
"loss": 1.7161, |
|
"step": 22103 |
|
}, |
|
{ |
|
"epoch": 0.5632168552834977, |
|
"grad_norm": 1.4804275035858154, |
|
"learning_rate": 9.625167587239467e-06, |
|
"loss": 1.7023, |
|
"step": 22134 |
|
}, |
|
{ |
|
"epoch": 0.5640056744085447, |
|
"grad_norm": 1.3880009651184082, |
|
"learning_rate": 9.559031610258007e-06, |
|
"loss": 1.7312, |
|
"step": 22165 |
|
}, |
|
{ |
|
"epoch": 0.5647944935335916, |
|
"grad_norm": 1.429051160812378, |
|
"learning_rate": 9.493069882506164e-06, |
|
"loss": 1.7102, |
|
"step": 22196 |
|
}, |
|
{ |
|
"epoch": 0.5655833126586385, |
|
"grad_norm": 1.4571672677993774, |
|
"learning_rate": 9.427283148353056e-06, |
|
"loss": 1.7234, |
|
"step": 22227 |
|
}, |
|
{ |
|
"epoch": 0.5663721317836854, |
|
"grad_norm": 1.4143497943878174, |
|
"learning_rate": 9.361672150193052e-06, |
|
"loss": 1.7129, |
|
"step": 22258 |
|
}, |
|
{ |
|
"epoch": 0.5671609509087323, |
|
"grad_norm": 1.4296061992645264, |
|
"learning_rate": 9.29623762843734e-06, |
|
"loss": 1.7291, |
|
"step": 22289 |
|
}, |
|
{ |
|
"epoch": 0.5679497700337792, |
|
"grad_norm": 1.4027940034866333, |
|
"learning_rate": 9.230980321505594e-06, |
|
"loss": 1.7199, |
|
"step": 22320 |
|
}, |
|
{ |
|
"epoch": 0.5687385891588261, |
|
"grad_norm": 1.4574463367462158, |
|
"learning_rate": 9.165900965817668e-06, |
|
"loss": 1.713, |
|
"step": 22351 |
|
}, |
|
{ |
|
"epoch": 0.569527408283873, |
|
"grad_norm": 1.4593865871429443, |
|
"learning_rate": 9.101000295785245e-06, |
|
"loss": 1.7153, |
|
"step": 22382 |
|
}, |
|
{ |
|
"epoch": 0.57031622740892, |
|
"grad_norm": 1.4154292345046997, |
|
"learning_rate": 9.036279043803565e-06, |
|
"loss": 1.7046, |
|
"step": 22413 |
|
}, |
|
{ |
|
"epoch": 0.571105046533967, |
|
"grad_norm": 1.4303706884384155, |
|
"learning_rate": 8.971737940243147e-06, |
|
"loss": 1.6939, |
|
"step": 22444 |
|
}, |
|
{ |
|
"epoch": 0.5718938656590139, |
|
"grad_norm": 1.4045100212097168, |
|
"learning_rate": 8.907377713441592e-06, |
|
"loss": 1.7022, |
|
"step": 22475 |
|
}, |
|
{ |
|
"epoch": 0.5726826847840608, |
|
"grad_norm": 1.4179104566574097, |
|
"learning_rate": 8.843199089695293e-06, |
|
"loss": 1.6987, |
|
"step": 22506 |
|
}, |
|
{ |
|
"epoch": 0.5734715039091077, |
|
"grad_norm": 1.4246447086334229, |
|
"learning_rate": 8.779202793251311e-06, |
|
"loss": 1.7187, |
|
"step": 22537 |
|
}, |
|
{ |
|
"epoch": 0.5742603230341546, |
|
"grad_norm": 1.3932377099990845, |
|
"learning_rate": 8.715389546299149e-06, |
|
"loss": 1.7074, |
|
"step": 22568 |
|
}, |
|
{ |
|
"epoch": 0.5750491421592016, |
|
"grad_norm": 1.3958845138549805, |
|
"learning_rate": 8.651760068962617e-06, |
|
"loss": 1.6959, |
|
"step": 22599 |
|
}, |
|
{ |
|
"epoch": 0.5758379612842485, |
|
"grad_norm": 1.4608432054519653, |
|
"learning_rate": 8.588315079291733e-06, |
|
"loss": 1.6956, |
|
"step": 22630 |
|
}, |
|
{ |
|
"epoch": 0.5766267804092954, |
|
"grad_norm": 1.4236079454421997, |
|
"learning_rate": 8.52505529325457e-06, |
|
"loss": 1.6964, |
|
"step": 22661 |
|
}, |
|
{ |
|
"epoch": 0.5774155995343423, |
|
"grad_norm": 1.4737471342086792, |
|
"learning_rate": 8.461981424729216e-06, |
|
"loss": 1.7316, |
|
"step": 22692 |
|
}, |
|
{ |
|
"epoch": 0.5782044186593892, |
|
"grad_norm": 1.4301661252975464, |
|
"learning_rate": 8.399094185495725e-06, |
|
"loss": 1.6969, |
|
"step": 22723 |
|
}, |
|
{ |
|
"epoch": 0.5789932377844361, |
|
"grad_norm": 1.5357433557510376, |
|
"learning_rate": 8.336394285228017e-06, |
|
"loss": 1.7211, |
|
"step": 22754 |
|
}, |
|
{ |
|
"epoch": 0.579782056909483, |
|
"grad_norm": 1.4420846700668335, |
|
"learning_rate": 8.273882431485952e-06, |
|
"loss": 1.7177, |
|
"step": 22785 |
|
}, |
|
{ |
|
"epoch": 0.58057087603453, |
|
"grad_norm": 1.402849793434143, |
|
"learning_rate": 8.211559329707316e-06, |
|
"loss": 1.7008, |
|
"step": 22816 |
|
}, |
|
{ |
|
"epoch": 0.5813596951595769, |
|
"grad_norm": 1.4084275960922241, |
|
"learning_rate": 8.149425683199823e-06, |
|
"loss": 1.7052, |
|
"step": 22847 |
|
}, |
|
{ |
|
"epoch": 0.5821485142846238, |
|
"grad_norm": 1.406717300415039, |
|
"learning_rate": 8.08748219313325e-06, |
|
"loss": 1.7201, |
|
"step": 22878 |
|
}, |
|
{ |
|
"epoch": 0.5829373334096707, |
|
"grad_norm": 1.9726225137710571, |
|
"learning_rate": 8.025729558531453e-06, |
|
"loss": 1.7197, |
|
"step": 22909 |
|
}, |
|
{ |
|
"epoch": 0.5837261525347176, |
|
"grad_norm": 1.4474542140960693, |
|
"learning_rate": 7.964168476264508e-06, |
|
"loss": 1.7314, |
|
"step": 22940 |
|
}, |
|
{ |
|
"epoch": 0.5845149716597645, |
|
"grad_norm": 1.5046030282974243, |
|
"learning_rate": 7.902799641040884e-06, |
|
"loss": 1.7128, |
|
"step": 22971 |
|
}, |
|
{ |
|
"epoch": 0.5853037907848114, |
|
"grad_norm": 1.4233800172805786, |
|
"learning_rate": 7.841623745399523e-06, |
|
"loss": 1.7026, |
|
"step": 23002 |
|
}, |
|
{ |
|
"epoch": 0.5860926099098583, |
|
"grad_norm": 1.4411020278930664, |
|
"learning_rate": 7.780641479702114e-06, |
|
"loss": 1.7039, |
|
"step": 23033 |
|
}, |
|
{ |
|
"epoch": 0.5868814290349053, |
|
"grad_norm": 1.3648072481155396, |
|
"learning_rate": 7.719853532125227e-06, |
|
"loss": 1.6997, |
|
"step": 23064 |
|
}, |
|
{ |
|
"epoch": 0.5876702481599522, |
|
"grad_norm": 1.3941482305526733, |
|
"learning_rate": 7.65926058865258e-06, |
|
"loss": 1.6947, |
|
"step": 23095 |
|
}, |
|
{ |
|
"epoch": 0.5884590672849991, |
|
"grad_norm": 1.4287201166152954, |
|
"learning_rate": 7.598863333067313e-06, |
|
"loss": 1.7081, |
|
"step": 23126 |
|
}, |
|
{ |
|
"epoch": 0.589247886410046, |
|
"grad_norm": 1.4891555309295654, |
|
"learning_rate": 7.538662446944253e-06, |
|
"loss": 1.6999, |
|
"step": 23157 |
|
}, |
|
{ |
|
"epoch": 0.5900367055350929, |
|
"grad_norm": 1.4390950202941895, |
|
"learning_rate": 7.478658609642211e-06, |
|
"loss": 1.71, |
|
"step": 23188 |
|
}, |
|
{ |
|
"epoch": 0.5908255246601398, |
|
"grad_norm": 1.4771630764007568, |
|
"learning_rate": 7.418852498296327e-06, |
|
"loss": 1.6975, |
|
"step": 23219 |
|
}, |
|
{ |
|
"epoch": 0.5916143437851867, |
|
"grad_norm": 1.4118000268936157, |
|
"learning_rate": 7.359244787810457e-06, |
|
"loss": 1.7028, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.5924031629102336, |
|
"grad_norm": 1.4303267002105713, |
|
"learning_rate": 7.299836150849493e-06, |
|
"loss": 1.7052, |
|
"step": 23281 |
|
}, |
|
{ |
|
"epoch": 0.5931919820352806, |
|
"grad_norm": 1.3951334953308105, |
|
"learning_rate": 7.240627257831847e-06, |
|
"loss": 1.711, |
|
"step": 23312 |
|
}, |
|
{ |
|
"epoch": 0.5939808011603275, |
|
"grad_norm": 1.4434106349945068, |
|
"learning_rate": 7.1816187769218195e-06, |
|
"loss": 1.71, |
|
"step": 23343 |
|
}, |
|
{ |
|
"epoch": 0.5947696202853744, |
|
"grad_norm": 1.4348808526992798, |
|
"learning_rate": 7.1228113740220895e-06, |
|
"loss": 1.7104, |
|
"step": 23374 |
|
}, |
|
{ |
|
"epoch": 0.5955584394104213, |
|
"grad_norm": 1.4280933141708374, |
|
"learning_rate": 7.064205712766226e-06, |
|
"loss": 1.6948, |
|
"step": 23405 |
|
}, |
|
{ |
|
"epoch": 0.5963472585354682, |
|
"grad_norm": 1.4204617738723755, |
|
"learning_rate": 7.005802454511129e-06, |
|
"loss": 1.7016, |
|
"step": 23436 |
|
}, |
|
{ |
|
"epoch": 0.5971360776605151, |
|
"grad_norm": 1.3821487426757812, |
|
"learning_rate": 6.947602258329639e-06, |
|
"loss": 1.6919, |
|
"step": 23467 |
|
}, |
|
{ |
|
"epoch": 0.597924896785562, |
|
"grad_norm": 1.4799888134002686, |
|
"learning_rate": 6.889605781003078e-06, |
|
"loss": 1.7245, |
|
"step": 23498 |
|
}, |
|
{ |
|
"epoch": 0.598713715910609, |
|
"grad_norm": 1.4447741508483887, |
|
"learning_rate": 6.831813677013776e-06, |
|
"loss": 1.7352, |
|
"step": 23529 |
|
}, |
|
{ |
|
"epoch": 0.5995025350356559, |
|
"grad_norm": 1.5367285013198853, |
|
"learning_rate": 6.774226598537792e-06, |
|
"loss": 1.7047, |
|
"step": 23560 |
|
}, |
|
{ |
|
"epoch": 0.6002913541607028, |
|
"grad_norm": 1.4005663394927979, |
|
"learning_rate": 6.716845195437482e-06, |
|
"loss": 1.7021, |
|
"step": 23591 |
|
}, |
|
{ |
|
"epoch": 0.6010801732857497, |
|
"grad_norm": 1.4289170503616333, |
|
"learning_rate": 6.659670115254168e-06, |
|
"loss": 1.7093, |
|
"step": 23622 |
|
}, |
|
{ |
|
"epoch": 0.6018689924107966, |
|
"grad_norm": 1.5853567123413086, |
|
"learning_rate": 6.602702003200872e-06, |
|
"loss": 1.7075, |
|
"step": 23653 |
|
}, |
|
{ |
|
"epoch": 0.6026578115358435, |
|
"grad_norm": 1.580708622932434, |
|
"learning_rate": 6.545941502154992e-06, |
|
"loss": 1.7041, |
|
"step": 23684 |
|
}, |
|
{ |
|
"epoch": 0.6034466306608904, |
|
"grad_norm": 1.477163553237915, |
|
"learning_rate": 6.489389252651057e-06, |
|
"loss": 1.7145, |
|
"step": 23715 |
|
}, |
|
{ |
|
"epoch": 0.6042354497859374, |
|
"grad_norm": 1.428688883781433, |
|
"learning_rate": 6.4330458928735325e-06, |
|
"loss": 1.6906, |
|
"step": 23746 |
|
}, |
|
{ |
|
"epoch": 0.6050242689109843, |
|
"grad_norm": 1.4114421606063843, |
|
"learning_rate": 6.376912058649559e-06, |
|
"loss": 1.7116, |
|
"step": 23777 |
|
}, |
|
{ |
|
"epoch": 0.6058130880360312, |
|
"grad_norm": 1.472838044166565, |
|
"learning_rate": 6.320988383441845e-06, |
|
"loss": 1.6997, |
|
"step": 23808 |
|
}, |
|
{ |
|
"epoch": 0.6066019071610781, |
|
"grad_norm": 1.4391415119171143, |
|
"learning_rate": 6.265275498341452e-06, |
|
"loss": 1.7061, |
|
"step": 23839 |
|
}, |
|
{ |
|
"epoch": 0.607390726286125, |
|
"grad_norm": 1.4201316833496094, |
|
"learning_rate": 6.209774032060714e-06, |
|
"loss": 1.6964, |
|
"step": 23870 |
|
}, |
|
{ |
|
"epoch": 0.6081795454111719, |
|
"grad_norm": 1.3875731229782104, |
|
"learning_rate": 6.1544846109261365e-06, |
|
"loss": 1.7004, |
|
"step": 23901 |
|
}, |
|
{ |
|
"epoch": 0.6089683645362188, |
|
"grad_norm": 1.4405038356781006, |
|
"learning_rate": 6.099407858871342e-06, |
|
"loss": 1.7059, |
|
"step": 23932 |
|
}, |
|
{ |
|
"epoch": 0.6097571836612657, |
|
"grad_norm": 1.4719831943511963, |
|
"learning_rate": 6.044544397429958e-06, |
|
"loss": 1.7063, |
|
"step": 23963 |
|
}, |
|
{ |
|
"epoch": 0.6105460027863128, |
|
"grad_norm": 1.4867260456085205, |
|
"learning_rate": 5.989894845728708e-06, |
|
"loss": 1.7054, |
|
"step": 23994 |
|
}, |
|
{ |
|
"epoch": 0.6113348219113597, |
|
"grad_norm": 1.4767833948135376, |
|
"learning_rate": 5.9354598204803605e-06, |
|
"loss": 1.6951, |
|
"step": 24025 |
|
}, |
|
{ |
|
"epoch": 0.6121236410364066, |
|
"grad_norm": 1.4202378988265991, |
|
"learning_rate": 5.881239935976762e-06, |
|
"loss": 1.7042, |
|
"step": 24056 |
|
}, |
|
{ |
|
"epoch": 0.6129124601614535, |
|
"grad_norm": 1.4658666849136353, |
|
"learning_rate": 5.827235804081954e-06, |
|
"loss": 1.7011, |
|
"step": 24087 |
|
}, |
|
{ |
|
"epoch": 0.6137012792865004, |
|
"grad_norm": 1.437771201133728, |
|
"learning_rate": 5.773448034225221e-06, |
|
"loss": 1.7033, |
|
"step": 24118 |
|
}, |
|
{ |
|
"epoch": 0.6144900984115473, |
|
"grad_norm": 1.4407992362976074, |
|
"learning_rate": 5.719877233394228e-06, |
|
"loss": 1.6841, |
|
"step": 24149 |
|
}, |
|
{ |
|
"epoch": 0.6152789175365942, |
|
"grad_norm": 1.434173822402954, |
|
"learning_rate": 5.666524006128191e-06, |
|
"loss": 1.6893, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 0.6160677366616412, |
|
"grad_norm": 1.5241893529891968, |
|
"learning_rate": 5.613388954511015e-06, |
|
"loss": 1.707, |
|
"step": 24211 |
|
}, |
|
{ |
|
"epoch": 0.6168565557866881, |
|
"grad_norm": 1.4565976858139038, |
|
"learning_rate": 5.560472678164552e-06, |
|
"loss": 1.695, |
|
"step": 24242 |
|
}, |
|
{ |
|
"epoch": 0.617645374911735, |
|
"grad_norm": 1.458123803138733, |
|
"learning_rate": 5.507775774241775e-06, |
|
"loss": 1.6988, |
|
"step": 24273 |
|
}, |
|
{ |
|
"epoch": 0.6184341940367819, |
|
"grad_norm": 1.4085556268692017, |
|
"learning_rate": 5.4552988374200945e-06, |
|
"loss": 1.6986, |
|
"step": 24304 |
|
}, |
|
{ |
|
"epoch": 0.6192230131618288, |
|
"grad_norm": 1.444799542427063, |
|
"learning_rate": 5.403042459894597e-06, |
|
"loss": 1.7036, |
|
"step": 24335 |
|
}, |
|
{ |
|
"epoch": 0.6200118322868757, |
|
"grad_norm": 1.417597770690918, |
|
"learning_rate": 5.3510072313714135e-06, |
|
"loss": 1.7016, |
|
"step": 24366 |
|
}, |
|
{ |
|
"epoch": 0.6208006514119226, |
|
"grad_norm": 1.4727599620819092, |
|
"learning_rate": 5.2991937390610205e-06, |
|
"loss": 1.7198, |
|
"step": 24397 |
|
}, |
|
{ |
|
"epoch": 0.6215894705369696, |
|
"grad_norm": 1.407718300819397, |
|
"learning_rate": 5.247602567671625e-06, |
|
"loss": 1.6932, |
|
"step": 24428 |
|
}, |
|
{ |
|
"epoch": 0.6223782896620165, |
|
"grad_norm": 1.424126148223877, |
|
"learning_rate": 5.196234299402603e-06, |
|
"loss": 1.6927, |
|
"step": 24459 |
|
}, |
|
{ |
|
"epoch": 0.6231671087870634, |
|
"grad_norm": 1.5233465433120728, |
|
"learning_rate": 5.145089513937865e-06, |
|
"loss": 1.7072, |
|
"step": 24490 |
|
}, |
|
{ |
|
"epoch": 0.6239559279121103, |
|
"grad_norm": 1.427517056465149, |
|
"learning_rate": 5.094168788439369e-06, |
|
"loss": 1.6981, |
|
"step": 24521 |
|
}, |
|
{ |
|
"epoch": 0.6247447470371572, |
|
"grad_norm": 1.4485392570495605, |
|
"learning_rate": 5.043472697540594e-06, |
|
"loss": 1.6855, |
|
"step": 24552 |
|
}, |
|
{ |
|
"epoch": 0.6255335661622041, |
|
"grad_norm": 1.4278972148895264, |
|
"learning_rate": 4.993001813340012e-06, |
|
"loss": 1.6945, |
|
"step": 24583 |
|
}, |
|
{ |
|
"epoch": 0.626322385287251, |
|
"grad_norm": 1.392105221748352, |
|
"learning_rate": 4.942756705394702e-06, |
|
"loss": 1.6946, |
|
"step": 24614 |
|
}, |
|
{ |
|
"epoch": 0.627111204412298, |
|
"grad_norm": 1.4224188327789307, |
|
"learning_rate": 4.892737940713884e-06, |
|
"loss": 1.7071, |
|
"step": 24645 |
|
}, |
|
{ |
|
"epoch": 0.6279000235373449, |
|
"grad_norm": 1.4652680158615112, |
|
"learning_rate": 4.842946083752511e-06, |
|
"loss": 1.6967, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 0.6286888426623918, |
|
"grad_norm": 1.490435004234314, |
|
"learning_rate": 4.79338169640493e-06, |
|
"loss": 1.6873, |
|
"step": 24707 |
|
}, |
|
{ |
|
"epoch": 0.6294776617874387, |
|
"grad_norm": 1.54020357131958, |
|
"learning_rate": 4.74404533799851e-06, |
|
"loss": 1.7026, |
|
"step": 24738 |
|
}, |
|
{ |
|
"epoch": 0.6302664809124856, |
|
"grad_norm": 1.3947267532348633, |
|
"learning_rate": 4.694937565287344e-06, |
|
"loss": 1.6959, |
|
"step": 24769 |
|
}, |
|
{ |
|
"epoch": 0.6310553000375325, |
|
"grad_norm": 1.4161572456359863, |
|
"learning_rate": 4.646058932445985e-06, |
|
"loss": 1.6909, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.6318441191625794, |
|
"grad_norm": 1.4541959762573242, |
|
"learning_rate": 4.597409991063148e-06, |
|
"loss": 1.6961, |
|
"step": 24831 |
|
}, |
|
{ |
|
"epoch": 0.6326329382876263, |
|
"grad_norm": 1.410683035850525, |
|
"learning_rate": 4.5489912901355375e-06, |
|
"loss": 1.6846, |
|
"step": 24862 |
|
}, |
|
{ |
|
"epoch": 0.6334217574126733, |
|
"grad_norm": 1.4031442403793335, |
|
"learning_rate": 4.500803376061608e-06, |
|
"loss": 1.6855, |
|
"step": 24893 |
|
}, |
|
{ |
|
"epoch": 0.6342105765377202, |
|
"grad_norm": 1.3770359754562378, |
|
"learning_rate": 4.45284679263541e-06, |
|
"loss": 1.6989, |
|
"step": 24924 |
|
}, |
|
{ |
|
"epoch": 0.6349993956627671, |
|
"grad_norm": 1.4767192602157593, |
|
"learning_rate": 4.4051220810404775e-06, |
|
"loss": 1.6911, |
|
"step": 24955 |
|
}, |
|
{ |
|
"epoch": 0.635788214787814, |
|
"grad_norm": 1.4399274587631226, |
|
"learning_rate": 4.3576297798437025e-06, |
|
"loss": 1.7003, |
|
"step": 24986 |
|
}, |
|
{ |
|
"epoch": 0.6365770339128609, |
|
"grad_norm": 1.3938783407211304, |
|
"learning_rate": 4.3103704249892436e-06, |
|
"loss": 1.7098, |
|
"step": 25017 |
|
}, |
|
{ |
|
"epoch": 0.6373658530379078, |
|
"grad_norm": 1.4374542236328125, |
|
"learning_rate": 4.263344549792487e-06, |
|
"loss": 1.6949, |
|
"step": 25048 |
|
}, |
|
{ |
|
"epoch": 0.6381546721629547, |
|
"grad_norm": 1.443415641784668, |
|
"learning_rate": 4.216552684934056e-06, |
|
"loss": 1.7002, |
|
"step": 25079 |
|
}, |
|
{ |
|
"epoch": 0.6389434912880017, |
|
"grad_norm": 1.455540418624878, |
|
"learning_rate": 4.169995358453777e-06, |
|
"loss": 1.7018, |
|
"step": 25110 |
|
}, |
|
{ |
|
"epoch": 0.6397323104130486, |
|
"grad_norm": 1.4947654008865356, |
|
"learning_rate": 4.123673095744757e-06, |
|
"loss": 1.681, |
|
"step": 25141 |
|
}, |
|
{ |
|
"epoch": 0.6405211295380955, |
|
"grad_norm": 1.4933280944824219, |
|
"learning_rate": 4.077586419547435e-06, |
|
"loss": 1.703, |
|
"step": 25172 |
|
}, |
|
{ |
|
"epoch": 0.6413099486631424, |
|
"grad_norm": 1.4724138975143433, |
|
"learning_rate": 4.03173584994368e-06, |
|
"loss": 1.6987, |
|
"step": 25203 |
|
}, |
|
{ |
|
"epoch": 0.6420987677881893, |
|
"grad_norm": 1.370006799697876, |
|
"learning_rate": 3.986121904350948e-06, |
|
"loss": 1.6881, |
|
"step": 25234 |
|
}, |
|
{ |
|
"epoch": 0.6428875869132362, |
|
"grad_norm": 1.5258022546768188, |
|
"learning_rate": 3.940745097516407e-06, |
|
"loss": 1.6856, |
|
"step": 25265 |
|
}, |
|
{ |
|
"epoch": 0.6436764060382831, |
|
"grad_norm": 1.3982164859771729, |
|
"learning_rate": 3.89560594151116e-06, |
|
"loss": 1.6956, |
|
"step": 25296 |
|
}, |
|
{ |
|
"epoch": 0.64446522516333, |
|
"grad_norm": 1.457051396369934, |
|
"learning_rate": 3.850704945724456e-06, |
|
"loss": 1.7038, |
|
"step": 25327 |
|
}, |
|
{ |
|
"epoch": 0.645254044288377, |
|
"grad_norm": 1.4047811031341553, |
|
"learning_rate": 3.8060426168579077e-06, |
|
"loss": 1.6984, |
|
"step": 25358 |
|
}, |
|
{ |
|
"epoch": 0.6460428634134239, |
|
"grad_norm": 1.3755521774291992, |
|
"learning_rate": 3.7616194589198407e-06, |
|
"loss": 1.7016, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 0.6468316825384708, |
|
"grad_norm": 1.4575284719467163, |
|
"learning_rate": 3.7174359732195574e-06, |
|
"loss": 1.6907, |
|
"step": 25420 |
|
}, |
|
{ |
|
"epoch": 0.6476205016635177, |
|
"grad_norm": 1.563887357711792, |
|
"learning_rate": 3.673492658361677e-06, |
|
"loss": 1.7141, |
|
"step": 25451 |
|
}, |
|
{ |
|
"epoch": 0.6484093207885646, |
|
"grad_norm": 1.4307068586349487, |
|
"learning_rate": 3.6297900102405467e-06, |
|
"loss": 1.7018, |
|
"step": 25482 |
|
}, |
|
{ |
|
"epoch": 0.6491981399136116, |
|
"grad_norm": 1.4639099836349487, |
|
"learning_rate": 3.586328522034607e-06, |
|
"loss": 1.7162, |
|
"step": 25513 |
|
}, |
|
{ |
|
"epoch": 0.6499869590386586, |
|
"grad_norm": 1.4759560823440552, |
|
"learning_rate": 3.543108684200838e-06, |
|
"loss": 1.6893, |
|
"step": 25544 |
|
}, |
|
{ |
|
"epoch": 0.6507757781637055, |
|
"grad_norm": 1.4981391429901123, |
|
"learning_rate": 3.5001309844692464e-06, |
|
"loss": 1.7037, |
|
"step": 25575 |
|
}, |
|
{ |
|
"epoch": 0.6515645972887524, |
|
"grad_norm": 1.4637056589126587, |
|
"learning_rate": 3.4573959078373215e-06, |
|
"loss": 1.683, |
|
"step": 25606 |
|
}, |
|
{ |
|
"epoch": 0.6523534164137993, |
|
"grad_norm": 1.5560393333435059, |
|
"learning_rate": 3.4149039365646063e-06, |
|
"loss": 1.6843, |
|
"step": 25637 |
|
}, |
|
{ |
|
"epoch": 0.6531422355388462, |
|
"grad_norm": 1.4658019542694092, |
|
"learning_rate": 3.3726555501672143e-06, |
|
"loss": 1.6883, |
|
"step": 25668 |
|
}, |
|
{ |
|
"epoch": 0.6539310546638931, |
|
"grad_norm": 1.397363543510437, |
|
"learning_rate": 3.33065122541244e-06, |
|
"loss": 1.7005, |
|
"step": 25699 |
|
}, |
|
{ |
|
"epoch": 0.65471987378894, |
|
"grad_norm": 1.439571738243103, |
|
"learning_rate": 3.288891436313385e-06, |
|
"loss": 1.7144, |
|
"step": 25730 |
|
}, |
|
{ |
|
"epoch": 0.655508692913987, |
|
"grad_norm": 1.4690093994140625, |
|
"learning_rate": 3.2473766541235963e-06, |
|
"loss": 1.6918, |
|
"step": 25761 |
|
}, |
|
{ |
|
"epoch": 0.6562975120390339, |
|
"grad_norm": 1.4217287302017212, |
|
"learning_rate": 3.2061073473317466e-06, |
|
"loss": 1.6902, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 0.6570863311640808, |
|
"grad_norm": 1.3401572704315186, |
|
"learning_rate": 3.1650839816563444e-06, |
|
"loss": 1.6949, |
|
"step": 25823 |
|
}, |
|
{ |
|
"epoch": 0.6578751502891277, |
|
"grad_norm": 1.4836351871490479, |
|
"learning_rate": 3.1243070200405093e-06, |
|
"loss": 1.6989, |
|
"step": 25854 |
|
}, |
|
{ |
|
"epoch": 0.6586639694141746, |
|
"grad_norm": 1.3935353755950928, |
|
"learning_rate": 3.0837769226467e-06, |
|
"loss": 1.6926, |
|
"step": 25885 |
|
}, |
|
{ |
|
"epoch": 0.6594527885392215, |
|
"grad_norm": 1.4989404678344727, |
|
"learning_rate": 3.0434941468515666e-06, |
|
"loss": 1.6857, |
|
"step": 25916 |
|
}, |
|
{ |
|
"epoch": 0.6602416076642684, |
|
"grad_norm": 1.4674372673034668, |
|
"learning_rate": 3.003459147240753e-06, |
|
"loss": 1.6912, |
|
"step": 25947 |
|
}, |
|
{ |
|
"epoch": 0.6610304267893153, |
|
"grad_norm": 1.5865478515625, |
|
"learning_rate": 2.9636723756037875e-06, |
|
"loss": 1.6904, |
|
"step": 25978 |
|
}, |
|
{ |
|
"epoch": 0.6618192459143623, |
|
"grad_norm": 1.440338134765625, |
|
"learning_rate": 2.9241342809289833e-06, |
|
"loss": 1.6981, |
|
"step": 26009 |
|
}, |
|
{ |
|
"epoch": 0.6626080650394092, |
|
"grad_norm": 1.3906199932098389, |
|
"learning_rate": 2.8848453093983594e-06, |
|
"loss": 1.6854, |
|
"step": 26040 |
|
}, |
|
{ |
|
"epoch": 0.6633968841644561, |
|
"grad_norm": 1.475035309791565, |
|
"learning_rate": 2.8458059043826257e-06, |
|
"loss": 1.704, |
|
"step": 26071 |
|
}, |
|
{ |
|
"epoch": 0.664185703289503, |
|
"grad_norm": 1.4185906648635864, |
|
"learning_rate": 2.807016506436172e-06, |
|
"loss": 1.6873, |
|
"step": 26102 |
|
}, |
|
{ |
|
"epoch": 0.6649745224145499, |
|
"grad_norm": 1.5231366157531738, |
|
"learning_rate": 2.7684775532920566e-06, |
|
"loss": 1.7009, |
|
"step": 26133 |
|
}, |
|
{ |
|
"epoch": 0.6657633415395968, |
|
"grad_norm": 1.427589774131775, |
|
"learning_rate": 2.7301894798571425e-06, |
|
"loss": 1.7065, |
|
"step": 26164 |
|
}, |
|
{ |
|
"epoch": 0.6665521606646437, |
|
"grad_norm": 1.4298368692398071, |
|
"learning_rate": 2.6921527182071386e-06, |
|
"loss": 1.6944, |
|
"step": 26195 |
|
}, |
|
{ |
|
"epoch": 0.6673409797896906, |
|
"grad_norm": 1.498779058456421, |
|
"learning_rate": 2.654367697581725e-06, |
|
"loss": 1.6845, |
|
"step": 26226 |
|
}, |
|
{ |
|
"epoch": 0.6681297989147376, |
|
"grad_norm": 1.4032225608825684, |
|
"learning_rate": 2.6168348443797175e-06, |
|
"loss": 1.6936, |
|
"step": 26257 |
|
}, |
|
{ |
|
"epoch": 0.6689186180397845, |
|
"grad_norm": 1.4571456909179688, |
|
"learning_rate": 2.5795545821542757e-06, |
|
"loss": 1.7055, |
|
"step": 26288 |
|
}, |
|
{ |
|
"epoch": 0.6697074371648314, |
|
"grad_norm": 1.4156779050827026, |
|
"learning_rate": 2.54252733160808e-06, |
|
"loss": 1.6861, |
|
"step": 26319 |
|
}, |
|
{ |
|
"epoch": 0.6704962562898783, |
|
"grad_norm": 1.5022954940795898, |
|
"learning_rate": 2.5057535105886294e-06, |
|
"loss": 1.6834, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.6712850754149252, |
|
"grad_norm": 1.4164525270462036, |
|
"learning_rate": 2.4692335340834953e-06, |
|
"loss": 1.699, |
|
"step": 26381 |
|
}, |
|
{ |
|
"epoch": 0.6720738945399721, |
|
"grad_norm": 1.459639072418213, |
|
"learning_rate": 2.432967814215639e-06, |
|
"loss": 1.6987, |
|
"step": 26412 |
|
}, |
|
{ |
|
"epoch": 0.672862713665019, |
|
"grad_norm": 1.4693511724472046, |
|
"learning_rate": 2.396956760238794e-06, |
|
"loss": 1.701, |
|
"step": 26443 |
|
}, |
|
{ |
|
"epoch": 0.673651532790066, |
|
"grad_norm": 1.3714548349380493, |
|
"learning_rate": 2.361200778532796e-06, |
|
"loss": 1.6754, |
|
"step": 26474 |
|
}, |
|
{ |
|
"epoch": 0.6744403519151129, |
|
"grad_norm": 1.4285922050476074, |
|
"learning_rate": 2.325700272599049e-06, |
|
"loss": 1.6907, |
|
"step": 26505 |
|
}, |
|
{ |
|
"epoch": 0.6752291710401598, |
|
"grad_norm": 1.5615297555923462, |
|
"learning_rate": 2.2904556430559415e-06, |
|
"loss": 1.6932, |
|
"step": 26536 |
|
}, |
|
{ |
|
"epoch": 0.6760179901652067, |
|
"grad_norm": 1.3956187963485718, |
|
"learning_rate": 2.2554672876343106e-06, |
|
"loss": 1.7064, |
|
"step": 26567 |
|
}, |
|
{ |
|
"epoch": 0.6768068092902536, |
|
"grad_norm": 1.4564794301986694, |
|
"learning_rate": 2.220735601173002e-06, |
|
"loss": 1.6922, |
|
"step": 26598 |
|
}, |
|
{ |
|
"epoch": 0.6775956284153005, |
|
"grad_norm": 1.4553749561309814, |
|
"learning_rate": 2.186260975614382e-06, |
|
"loss": 1.7055, |
|
"step": 26629 |
|
}, |
|
{ |
|
"epoch": 0.6783844475403474, |
|
"grad_norm": 1.4266986846923828, |
|
"learning_rate": 2.1520437999999034e-06, |
|
"loss": 1.7145, |
|
"step": 26660 |
|
}, |
|
{ |
|
"epoch": 0.6791732666653943, |
|
"grad_norm": 1.4530359506607056, |
|
"learning_rate": 2.1180844604657526e-06, |
|
"loss": 1.6916, |
|
"step": 26691 |
|
}, |
|
{ |
|
"epoch": 0.6799620857904413, |
|
"grad_norm": 1.4178498983383179, |
|
"learning_rate": 2.084383340238455e-06, |
|
"loss": 1.6766, |
|
"step": 26722 |
|
}, |
|
{ |
|
"epoch": 0.6807509049154882, |
|
"grad_norm": 1.393988847732544, |
|
"learning_rate": 2.0509408196305704e-06, |
|
"loss": 1.6873, |
|
"step": 26753 |
|
}, |
|
{ |
|
"epoch": 0.6815397240405351, |
|
"grad_norm": 1.3752752542495728, |
|
"learning_rate": 2.017757276036403e-06, |
|
"loss": 1.6984, |
|
"step": 26784 |
|
}, |
|
{ |
|
"epoch": 0.682328543165582, |
|
"grad_norm": 1.394559383392334, |
|
"learning_rate": 1.984833083927726e-06, |
|
"loss": 1.7032, |
|
"step": 26815 |
|
}, |
|
{ |
|
"epoch": 0.6831173622906289, |
|
"grad_norm": 1.4148964881896973, |
|
"learning_rate": 1.952168614849581e-06, |
|
"loss": 1.6844, |
|
"step": 26846 |
|
}, |
|
{ |
|
"epoch": 0.6839061814156758, |
|
"grad_norm": 1.4353492259979248, |
|
"learning_rate": 1.919764237416058e-06, |
|
"loss": 1.7102, |
|
"step": 26877 |
|
}, |
|
{ |
|
"epoch": 0.6846950005407227, |
|
"grad_norm": 1.3867477178573608, |
|
"learning_rate": 1.8876203173061463e-06, |
|
"loss": 1.6931, |
|
"step": 26908 |
|
}, |
|
{ |
|
"epoch": 0.6854838196657697, |
|
"grad_norm": 1.3678532838821411, |
|
"learning_rate": 1.8557372172596206e-06, |
|
"loss": 1.7009, |
|
"step": 26939 |
|
}, |
|
{ |
|
"epoch": 0.6862726387908166, |
|
"grad_norm": 1.485137939453125, |
|
"learning_rate": 1.8241152970729341e-06, |
|
"loss": 1.7016, |
|
"step": 26970 |
|
}, |
|
{ |
|
"epoch": 0.6870614579158635, |
|
"grad_norm": 1.4369994401931763, |
|
"learning_rate": 1.7927549135951572e-06, |
|
"loss": 1.6963, |
|
"step": 27001 |
|
}, |
|
{ |
|
"epoch": 0.6878502770409104, |
|
"grad_norm": 1.4508947134017944, |
|
"learning_rate": 1.7616564207239477e-06, |
|
"loss": 1.6831, |
|
"step": 27032 |
|
}, |
|
{ |
|
"epoch": 0.6886390961659574, |
|
"grad_norm": 1.3917666673660278, |
|
"learning_rate": 1.730820169401584e-06, |
|
"loss": 1.6708, |
|
"step": 27063 |
|
}, |
|
{ |
|
"epoch": 0.6894279152910043, |
|
"grad_norm": 1.4687188863754272, |
|
"learning_rate": 1.7002465076109558e-06, |
|
"loss": 1.6764, |
|
"step": 27094 |
|
}, |
|
{ |
|
"epoch": 0.6902167344160512, |
|
"grad_norm": 1.4053486585617065, |
|
"learning_rate": 1.6699357803716898e-06, |
|
"loss": 1.6931, |
|
"step": 27125 |
|
}, |
|
{ |
|
"epoch": 0.6910055535410982, |
|
"grad_norm": 1.4645016193389893, |
|
"learning_rate": 1.6398883297362305e-06, |
|
"loss": 1.7035, |
|
"step": 27156 |
|
}, |
|
{ |
|
"epoch": 0.6917943726661451, |
|
"grad_norm": 1.434180498123169, |
|
"learning_rate": 1.6101044947859606e-06, |
|
"loss": 1.6693, |
|
"step": 27187 |
|
}, |
|
{ |
|
"epoch": 0.692583191791192, |
|
"grad_norm": 1.451497197151184, |
|
"learning_rate": 1.5805846116274114e-06, |
|
"loss": 1.6776, |
|
"step": 27218 |
|
}, |
|
{ |
|
"epoch": 0.6933720109162389, |
|
"grad_norm": 1.4085174798965454, |
|
"learning_rate": 1.5513290133884611e-06, |
|
"loss": 1.684, |
|
"step": 27249 |
|
}, |
|
{ |
|
"epoch": 0.6941608300412858, |
|
"grad_norm": 1.3959869146347046, |
|
"learning_rate": 1.5223380302145512e-06, |
|
"loss": 1.6798, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 0.6949496491663327, |
|
"grad_norm": 1.377614974975586, |
|
"learning_rate": 1.4936119892649925e-06, |
|
"loss": 1.6889, |
|
"step": 27311 |
|
}, |
|
{ |
|
"epoch": 0.6957384682913796, |
|
"grad_norm": 1.4654227495193481, |
|
"learning_rate": 1.4651512147092482e-06, |
|
"loss": 1.7027, |
|
"step": 27342 |
|
}, |
|
{ |
|
"epoch": 0.6965272874164266, |
|
"grad_norm": 1.336857795715332, |
|
"learning_rate": 1.4369560277232908e-06, |
|
"loss": 1.6756, |
|
"step": 27373 |
|
}, |
|
{ |
|
"epoch": 0.6973161065414735, |
|
"grad_norm": 1.4347259998321533, |
|
"learning_rate": 1.409026746485978e-06, |
|
"loss": 1.6831, |
|
"step": 27404 |
|
}, |
|
{ |
|
"epoch": 0.6981049256665204, |
|
"grad_norm": 1.5176235437393188, |
|
"learning_rate": 1.3813636861754464e-06, |
|
"loss": 1.6864, |
|
"step": 27435 |
|
}, |
|
{ |
|
"epoch": 0.6988937447915673, |
|
"grad_norm": 1.4501276016235352, |
|
"learning_rate": 1.3539671589655773e-06, |
|
"loss": 1.6941, |
|
"step": 27466 |
|
} |
|
], |
|
"logging_steps": 31, |
|
"max_steps": 30517, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 3052, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.037550548620044e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|