|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 3665, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0068212824010914054, |
|
"grad_norm": 2.718381452173294, |
|
"learning_rate": 1.3586956521739131e-06, |
|
"loss": 0.8554, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013642564802182811, |
|
"grad_norm": 1.8320824052298514, |
|
"learning_rate": 2.7173913043478263e-06, |
|
"loss": 0.8525, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.020463847203274217, |
|
"grad_norm": 1.4237490955410819, |
|
"learning_rate": 4.07608695652174e-06, |
|
"loss": 0.8086, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.027285129604365622, |
|
"grad_norm": 1.0670318844733424, |
|
"learning_rate": 5.4347826086956525e-06, |
|
"loss": 0.7639, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.034106412005457026, |
|
"grad_norm": 0.8294384112181443, |
|
"learning_rate": 6.7934782608695655e-06, |
|
"loss": 0.7246, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.040927694406548434, |
|
"grad_norm": 0.6498540769323535, |
|
"learning_rate": 8.15217391304348e-06, |
|
"loss": 0.6903, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.047748976807639835, |
|
"grad_norm": 0.44534977628267997, |
|
"learning_rate": 9.510869565217392e-06, |
|
"loss": 0.6795, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.054570259208731244, |
|
"grad_norm": 0.3942695256318386, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"loss": 0.6436, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.061391541609822645, |
|
"grad_norm": 0.33678552623662655, |
|
"learning_rate": 1.2228260869565218e-05, |
|
"loss": 0.6429, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06821282401091405, |
|
"grad_norm": 0.3279277406182261, |
|
"learning_rate": 1.3586956521739131e-05, |
|
"loss": 0.6403, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07503410641200546, |
|
"grad_norm": 0.3110307712217628, |
|
"learning_rate": 1.4945652173913044e-05, |
|
"loss": 0.6374, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08185538881309687, |
|
"grad_norm": 0.3155916087900725, |
|
"learning_rate": 1.630434782608696e-05, |
|
"loss": 0.6218, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08867667121418826, |
|
"grad_norm": 0.3402617489170636, |
|
"learning_rate": 1.766304347826087e-05, |
|
"loss": 0.6237, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09549795361527967, |
|
"grad_norm": 0.3613035423063458, |
|
"learning_rate": 1.9021739130434784e-05, |
|
"loss": 0.611, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.10231923601637108, |
|
"grad_norm": 0.33685873327255084, |
|
"learning_rate": 2.0380434782608694e-05, |
|
"loss": 0.6119, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.10914051841746249, |
|
"grad_norm": 0.31895725640857175, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 0.5929, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11596180081855388, |
|
"grad_norm": 0.367814337182358, |
|
"learning_rate": 2.3097826086956523e-05, |
|
"loss": 0.5847, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.12278308321964529, |
|
"grad_norm": 0.33537099137809573, |
|
"learning_rate": 2.4456521739130436e-05, |
|
"loss": 0.6021, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1296043656207367, |
|
"grad_norm": 0.3515481694311979, |
|
"learning_rate": 2.5815217391304346e-05, |
|
"loss": 0.5971, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1364256480218281, |
|
"grad_norm": 0.48781555539445853, |
|
"learning_rate": 2.7173913043478262e-05, |
|
"loss": 0.5828, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1432469304229195, |
|
"grad_norm": 0.3695141232563311, |
|
"learning_rate": 2.8532608695652175e-05, |
|
"loss": 0.5839, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.15006821282401092, |
|
"grad_norm": 0.3608533158391854, |
|
"learning_rate": 2.9891304347826088e-05, |
|
"loss": 0.587, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15688949522510232, |
|
"grad_norm": 0.42150696344097405, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.5848, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.16371077762619374, |
|
"grad_norm": 0.4645864912756431, |
|
"learning_rate": 3.260869565217392e-05, |
|
"loss": 0.5733, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.17053206002728513, |
|
"grad_norm": 0.48252326725187134, |
|
"learning_rate": 3.3967391304347826e-05, |
|
"loss": 0.5799, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17735334242837653, |
|
"grad_norm": 0.439342079352817, |
|
"learning_rate": 3.532608695652174e-05, |
|
"loss": 0.5754, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18417462482946795, |
|
"grad_norm": 0.5005585625374034, |
|
"learning_rate": 3.668478260869566e-05, |
|
"loss": 0.5744, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.19099590723055934, |
|
"grad_norm": 0.5876076245892312, |
|
"learning_rate": 3.804347826086957e-05, |
|
"loss": 0.5871, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19781718963165076, |
|
"grad_norm": 0.5038018525075361, |
|
"learning_rate": 3.940217391304348e-05, |
|
"loss": 0.559, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.20463847203274216, |
|
"grad_norm": 0.5317683287124427, |
|
"learning_rate": 4.076086956521739e-05, |
|
"loss": 0.5743, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21145975443383355, |
|
"grad_norm": 0.47414673284908293, |
|
"learning_rate": 4.2119565217391304e-05, |
|
"loss": 0.5582, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.21828103683492497, |
|
"grad_norm": 0.5016540873040667, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 0.5667, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.22510231923601637, |
|
"grad_norm": 0.4794691683623029, |
|
"learning_rate": 4.483695652173913e-05, |
|
"loss": 0.5696, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.23192360163710776, |
|
"grad_norm": 0.6036530565440041, |
|
"learning_rate": 4.6195652173913046e-05, |
|
"loss": 0.5629, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23874488403819918, |
|
"grad_norm": 0.5121018480218417, |
|
"learning_rate": 4.7554347826086956e-05, |
|
"loss": 0.5774, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.24556616643929058, |
|
"grad_norm": 0.5943036033467437, |
|
"learning_rate": 4.891304347826087e-05, |
|
"loss": 0.5574, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.252387448840382, |
|
"grad_norm": 0.5932480324295518, |
|
"learning_rate": 4.999999083686275e-05, |
|
"loss": 0.5531, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.2592087312414734, |
|
"grad_norm": 0.535232775932377, |
|
"learning_rate": 4.999967012784259e-05, |
|
"loss": 0.5638, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2660300136425648, |
|
"grad_norm": 0.4932300563298028, |
|
"learning_rate": 4.999889126942317e-05, |
|
"loss": 0.551, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.2728512960436562, |
|
"grad_norm": 0.3724078808223032, |
|
"learning_rate": 4.999765427746401e-05, |
|
"loss": 0.5601, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27967257844474763, |
|
"grad_norm": 0.527609939821496, |
|
"learning_rate": 4.9995959177153344e-05, |
|
"loss": 0.5533, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.286493860845839, |
|
"grad_norm": 0.48098555041424135, |
|
"learning_rate": 4.999380600300766e-05, |
|
"loss": 0.5543, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2933151432469304, |
|
"grad_norm": 0.3977697801311094, |
|
"learning_rate": 4.999119479887092e-05, |
|
"loss": 0.5572, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.30013642564802184, |
|
"grad_norm": 0.4438001480487939, |
|
"learning_rate": 4.9988125617913766e-05, |
|
"loss": 0.5362, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3069577080491132, |
|
"grad_norm": 0.40138360501438736, |
|
"learning_rate": 4.998459852263239e-05, |
|
"loss": 0.5488, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.31377899045020463, |
|
"grad_norm": 0.3935147485176112, |
|
"learning_rate": 4.9980613584847244e-05, |
|
"loss": 0.5443, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.32060027285129605, |
|
"grad_norm": 0.47675495570547083, |
|
"learning_rate": 4.9976170885701596e-05, |
|
"loss": 0.5363, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3274215552523875, |
|
"grad_norm": 0.37900784050363756, |
|
"learning_rate": 4.9971270515659874e-05, |
|
"loss": 0.5489, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33424283765347884, |
|
"grad_norm": 0.43306874676755636, |
|
"learning_rate": 4.996591257450584e-05, |
|
"loss": 0.5575, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.34106412005457026, |
|
"grad_norm": 0.45525589941001887, |
|
"learning_rate": 4.996009717134054e-05, |
|
"loss": 0.5394, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3478854024556617, |
|
"grad_norm": 0.36973267988621533, |
|
"learning_rate": 4.995382442458009e-05, |
|
"loss": 0.5404, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.35470668485675305, |
|
"grad_norm": 0.3888515869412804, |
|
"learning_rate": 4.9947094461953255e-05, |
|
"loss": 0.5291, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3615279672578445, |
|
"grad_norm": 0.35113901228576483, |
|
"learning_rate": 4.993990742049886e-05, |
|
"loss": 0.5485, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.3683492496589359, |
|
"grad_norm": 0.41740322629825416, |
|
"learning_rate": 4.9932263446562995e-05, |
|
"loss": 0.5374, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37517053206002726, |
|
"grad_norm": 0.4157253561819312, |
|
"learning_rate": 4.9924162695796016e-05, |
|
"loss": 0.5332, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3819918144611187, |
|
"grad_norm": 0.35820892208440913, |
|
"learning_rate": 4.991560533314944e-05, |
|
"loss": 0.539, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3888130968622101, |
|
"grad_norm": 0.4088803576788514, |
|
"learning_rate": 4.9906591532872496e-05, |
|
"loss": 0.5433, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.3956343792633015, |
|
"grad_norm": 0.371678787656567, |
|
"learning_rate": 4.989712147850865e-05, |
|
"loss": 0.5389, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4024556616643929, |
|
"grad_norm": 0.582724495752912, |
|
"learning_rate": 4.988719536289182e-05, |
|
"loss": 0.5303, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4092769440654843, |
|
"grad_norm": 0.5039803164193325, |
|
"learning_rate": 4.9876813388142466e-05, |
|
"loss": 0.5299, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41609822646657574, |
|
"grad_norm": 0.45140868023849334, |
|
"learning_rate": 4.986597576566351e-05, |
|
"loss": 0.545, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4229195088676671, |
|
"grad_norm": 0.4153549634571551, |
|
"learning_rate": 4.9854682716135965e-05, |
|
"loss": 0.5421, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4297407912687585, |
|
"grad_norm": 0.39423844986474943, |
|
"learning_rate": 4.984293446951447e-05, |
|
"loss": 0.5363, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.43656207366984995, |
|
"grad_norm": 0.38143699656601243, |
|
"learning_rate": 4.983073126502266e-05, |
|
"loss": 0.5366, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4433833560709413, |
|
"grad_norm": 0.40244690165803765, |
|
"learning_rate": 4.9818073351148184e-05, |
|
"loss": 0.5429, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.45020463847203274, |
|
"grad_norm": 0.4415944055471395, |
|
"learning_rate": 4.9804960985637745e-05, |
|
"loss": 0.5304, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.45702592087312416, |
|
"grad_norm": 0.3394607588175486, |
|
"learning_rate": 4.9791394435491815e-05, |
|
"loss": 0.5367, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.4638472032742155, |
|
"grad_norm": 0.44036319608717406, |
|
"learning_rate": 4.977737397695919e-05, |
|
"loss": 0.5296, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.47066848567530695, |
|
"grad_norm": 0.3612121871031905, |
|
"learning_rate": 4.9762899895531365e-05, |
|
"loss": 0.5226, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.47748976807639837, |
|
"grad_norm": 0.4084070856561623, |
|
"learning_rate": 4.9747972485936746e-05, |
|
"loss": 0.5312, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4843110504774898, |
|
"grad_norm": 0.4187475634442714, |
|
"learning_rate": 4.973259205213461e-05, |
|
"loss": 0.5405, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.49113233287858116, |
|
"grad_norm": 0.43222798441609056, |
|
"learning_rate": 4.971675890730897e-05, |
|
"loss": 0.5442, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4979536152796726, |
|
"grad_norm": 0.43270299261628004, |
|
"learning_rate": 4.9700473373862124e-05, |
|
"loss": 0.5219, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.504774897680764, |
|
"grad_norm": 0.3734426909759304, |
|
"learning_rate": 4.9683735783408165e-05, |
|
"loss": 0.5238, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5115961800818554, |
|
"grad_norm": 0.41628380265626064, |
|
"learning_rate": 4.9666546476766164e-05, |
|
"loss": 0.5321, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5184174624829468, |
|
"grad_norm": 0.3923952948213395, |
|
"learning_rate": 4.9648905803953284e-05, |
|
"loss": 0.5182, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5252387448840382, |
|
"grad_norm": 0.31124417935955667, |
|
"learning_rate": 4.963081412417762e-05, |
|
"loss": 0.5277, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5320600272851296, |
|
"grad_norm": 0.359338085196968, |
|
"learning_rate": 4.961227180583089e-05, |
|
"loss": 0.5304, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.538881309686221, |
|
"grad_norm": 0.333204967843598, |
|
"learning_rate": 4.9593279226480944e-05, |
|
"loss": 0.5139, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5457025920873124, |
|
"grad_norm": 0.4730935475590952, |
|
"learning_rate": 4.9573836772864074e-05, |
|
"loss": 0.5361, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5525238744884038, |
|
"grad_norm": 0.44098956286299457, |
|
"learning_rate": 4.955394484087711e-05, |
|
"loss": 0.5343, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5593451568894953, |
|
"grad_norm": 0.42042925676970927, |
|
"learning_rate": 4.953360383556941e-05, |
|
"loss": 0.5193, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5661664392905866, |
|
"grad_norm": 0.42110351706341315, |
|
"learning_rate": 4.951281417113457e-05, |
|
"loss": 0.5245, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.572987721691678, |
|
"grad_norm": 0.3695438489534778, |
|
"learning_rate": 4.9491576270902e-05, |
|
"loss": 0.5241, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5798090040927695, |
|
"grad_norm": 0.41403931778963793, |
|
"learning_rate": 4.946989056732833e-05, |
|
"loss": 0.5192, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5866302864938608, |
|
"grad_norm": 0.3982973651072619, |
|
"learning_rate": 4.944775750198858e-05, |
|
"loss": 0.524, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5934515688949522, |
|
"grad_norm": 0.42595084178546927, |
|
"learning_rate": 4.942517752556714e-05, |
|
"loss": 0.5134, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6002728512960437, |
|
"grad_norm": 0.35556353177455896, |
|
"learning_rate": 4.940215109784865e-05, |
|
"loss": 0.5109, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.607094133697135, |
|
"grad_norm": 0.3420291981554025, |
|
"learning_rate": 4.937867868770862e-05, |
|
"loss": 0.5238, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6139154160982264, |
|
"grad_norm": 0.4013278641302897, |
|
"learning_rate": 4.9354760773103845e-05, |
|
"loss": 0.5225, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6207366984993179, |
|
"grad_norm": 0.3606171148062443, |
|
"learning_rate": 4.933039784106272e-05, |
|
"loss": 0.5259, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6275579809004093, |
|
"grad_norm": 0.3381144814134007, |
|
"learning_rate": 4.930559038767532e-05, |
|
"loss": 0.5194, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6343792633015006, |
|
"grad_norm": 0.391843344180771, |
|
"learning_rate": 4.9280338918083264e-05, |
|
"loss": 0.5139, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6412005457025921, |
|
"grad_norm": 0.36206934317906436, |
|
"learning_rate": 4.925464394646944e-05, |
|
"loss": 0.5325, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6480218281036835, |
|
"grad_norm": 0.3433338943637673, |
|
"learning_rate": 4.922850599604756e-05, |
|
"loss": 0.5085, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.654843110504775, |
|
"grad_norm": 0.3299742405744191, |
|
"learning_rate": 4.920192559905149e-05, |
|
"loss": 0.5108, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6616643929058663, |
|
"grad_norm": 0.4134988825523861, |
|
"learning_rate": 4.9174903296724394e-05, |
|
"loss": 0.5249, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6684856753069577, |
|
"grad_norm": 0.37990282467605163, |
|
"learning_rate": 4.914743963930775e-05, |
|
"loss": 0.5102, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6753069577080492, |
|
"grad_norm": 0.399202229699515, |
|
"learning_rate": 4.911953518603012e-05, |
|
"loss": 0.5031, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.6821282401091405, |
|
"grad_norm": 0.4466818320853147, |
|
"learning_rate": 4.909119050509576e-05, |
|
"loss": 0.5077, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6889495225102319, |
|
"grad_norm": 0.3878038281442584, |
|
"learning_rate": 4.906240617367308e-05, |
|
"loss": 0.5163, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.6957708049113234, |
|
"grad_norm": 0.46953615879792254, |
|
"learning_rate": 4.9033182777882845e-05, |
|
"loss": 0.5136, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7025920873124147, |
|
"grad_norm": 0.43602888466155715, |
|
"learning_rate": 4.9003520912786286e-05, |
|
"loss": 0.518, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7094133697135061, |
|
"grad_norm": 0.39296876089094146, |
|
"learning_rate": 4.8973421182372955e-05, |
|
"loss": 0.5298, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7162346521145976, |
|
"grad_norm": 0.4385411631704965, |
|
"learning_rate": 4.8942884199548424e-05, |
|
"loss": 0.5142, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.723055934515689, |
|
"grad_norm": 0.3690695281664004, |
|
"learning_rate": 4.891191058612184e-05, |
|
"loss": 0.5078, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7298772169167803, |
|
"grad_norm": 0.38913526291124273, |
|
"learning_rate": 4.8880500972793204e-05, |
|
"loss": 0.5134, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7366984993178718, |
|
"grad_norm": 0.3579203031993548, |
|
"learning_rate": 4.88486559991406e-05, |
|
"loss": 0.5185, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7435197817189632, |
|
"grad_norm": 0.3437909123481529, |
|
"learning_rate": 4.8816376313607095e-05, |
|
"loss": 0.5218, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.7503410641200545, |
|
"grad_norm": 0.34425300375670553, |
|
"learning_rate": 4.878366257348761e-05, |
|
"loss": 0.5063, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.757162346521146, |
|
"grad_norm": 0.37603307112189627, |
|
"learning_rate": 4.8750515444915475e-05, |
|
"loss": 0.5067, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.7639836289222374, |
|
"grad_norm": 0.42138974150775454, |
|
"learning_rate": 4.8716935602848904e-05, |
|
"loss": 0.5025, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7708049113233287, |
|
"grad_norm": 0.46990974803356833, |
|
"learning_rate": 4.868292373105722e-05, |
|
"loss": 0.5156, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.7776261937244202, |
|
"grad_norm": 0.4146567827090065, |
|
"learning_rate": 4.8648480522106974e-05, |
|
"loss": 0.5064, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7844474761255116, |
|
"grad_norm": 0.37240757378463407, |
|
"learning_rate": 4.8613606677347794e-05, |
|
"loss": 0.506, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.791268758526603, |
|
"grad_norm": 0.4262453048825825, |
|
"learning_rate": 4.857830290689814e-05, |
|
"loss": 0.5209, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.7980900409276944, |
|
"grad_norm": 0.4373383547064571, |
|
"learning_rate": 4.8542569929630844e-05, |
|
"loss": 0.5081, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.8049113233287858, |
|
"grad_norm": 0.44110437145532355, |
|
"learning_rate": 4.8506408473158414e-05, |
|
"loss": 0.522, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8117326057298773, |
|
"grad_norm": 0.4173217160585339, |
|
"learning_rate": 4.8469819273818315e-05, |
|
"loss": 0.5074, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8185538881309686, |
|
"grad_norm": 0.44658170498127164, |
|
"learning_rate": 4.843280307665788e-05, |
|
"loss": 0.5052, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.82537517053206, |
|
"grad_norm": 0.3857834259517951, |
|
"learning_rate": 4.8395360635419226e-05, |
|
"loss": 0.5109, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8321964529331515, |
|
"grad_norm": 0.3505595634781613, |
|
"learning_rate": 4.835749271252383e-05, |
|
"loss": 0.5048, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.8390177353342428, |
|
"grad_norm": 0.4486071997949835, |
|
"learning_rate": 4.8319200079057044e-05, |
|
"loss": 0.502, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.8458390177353342, |
|
"grad_norm": 0.36540477393499693, |
|
"learning_rate": 4.828048351475239e-05, |
|
"loss": 0.4994, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8526603001364257, |
|
"grad_norm": 0.3535891851423872, |
|
"learning_rate": 4.824134380797568e-05, |
|
"loss": 0.5156, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.859481582537517, |
|
"grad_norm": 0.3564989100834705, |
|
"learning_rate": 4.820178175570897e-05, |
|
"loss": 0.5145, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8663028649386084, |
|
"grad_norm": 0.34049206381140384, |
|
"learning_rate": 4.81617981635343e-05, |
|
"loss": 0.5124, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.8731241473396999, |
|
"grad_norm": 0.322576632173292, |
|
"learning_rate": 4.8121393845617336e-05, |
|
"loss": 0.4972, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.8799454297407913, |
|
"grad_norm": 0.35074887766341006, |
|
"learning_rate": 4.808056962469076e-05, |
|
"loss": 0.5005, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.8867667121418826, |
|
"grad_norm": 0.37244533491828524, |
|
"learning_rate": 4.803932633203753e-05, |
|
"loss": 0.508, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8935879945429741, |
|
"grad_norm": 0.3439172116866752, |
|
"learning_rate": 4.799766480747394e-05, |
|
"loss": 0.5027, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.9004092769440655, |
|
"grad_norm": 0.4355482554835195, |
|
"learning_rate": 4.795558589933254e-05, |
|
"loss": 0.5067, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9072305593451568, |
|
"grad_norm": 0.4870363512314333, |
|
"learning_rate": 4.791309046444485e-05, |
|
"loss": 0.5029, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.9140518417462483, |
|
"grad_norm": 0.4120257961084212, |
|
"learning_rate": 4.787017936812391e-05, |
|
"loss": 0.5097, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9208731241473397, |
|
"grad_norm": 0.37936056411557967, |
|
"learning_rate": 4.782685348414666e-05, |
|
"loss": 0.5002, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.927694406548431, |
|
"grad_norm": 0.4164234019796205, |
|
"learning_rate": 4.7783113694736155e-05, |
|
"loss": 0.5095, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9345156889495225, |
|
"grad_norm": 0.41384348002400084, |
|
"learning_rate": 4.77389608905436e-05, |
|
"loss": 0.4951, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.9413369713506139, |
|
"grad_norm": 0.4758845307523666, |
|
"learning_rate": 4.769439597063021e-05, |
|
"loss": 0.5085, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9481582537517054, |
|
"grad_norm": 0.3574461311161845, |
|
"learning_rate": 4.7649419842448897e-05, |
|
"loss": 0.5059, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.9549795361527967, |
|
"grad_norm": 0.3495938838243895, |
|
"learning_rate": 4.76040334218258e-05, |
|
"loss": 0.5081, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9618008185538881, |
|
"grad_norm": 0.4378709381568982, |
|
"learning_rate": 4.755823763294165e-05, |
|
"loss": 0.4918, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.9686221009549796, |
|
"grad_norm": 0.29336875522442324, |
|
"learning_rate": 4.751203340831293e-05, |
|
"loss": 0.5024, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.975443383356071, |
|
"grad_norm": 0.36977219152146296, |
|
"learning_rate": 4.746542168877286e-05, |
|
"loss": 0.5134, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.9822646657571623, |
|
"grad_norm": 0.3291235462155399, |
|
"learning_rate": 4.741840342345234e-05, |
|
"loss": 0.4992, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9890859481582538, |
|
"grad_norm": 0.3371844589918567, |
|
"learning_rate": 4.7370979569760487e-05, |
|
"loss": 0.4955, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9959072305593452, |
|
"grad_norm": 0.38097927189872294, |
|
"learning_rate": 4.732315109336526e-05, |
|
"loss": 0.5028, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0027285129604366, |
|
"grad_norm": 0.3827764202163699, |
|
"learning_rate": 4.7274918968173715e-05, |
|
"loss": 0.4903, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.009549795361528, |
|
"grad_norm": 0.3823197870028838, |
|
"learning_rate": 4.722628417631222e-05, |
|
"loss": 0.4698, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.0163710777626194, |
|
"grad_norm": 0.49050241814536016, |
|
"learning_rate": 4.717724770810644e-05, |
|
"loss": 0.4784, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.0231923601637107, |
|
"grad_norm": 0.42959343836399067, |
|
"learning_rate": 4.712781056206115e-05, |
|
"loss": 0.4784, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.030013642564802, |
|
"grad_norm": 0.33192677798145853, |
|
"learning_rate": 4.707797374483995e-05, |
|
"loss": 0.4688, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.0368349249658937, |
|
"grad_norm": 0.33677259823116745, |
|
"learning_rate": 4.7027738271244745e-05, |
|
"loss": 0.4709, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.043656207366985, |
|
"grad_norm": 0.3587520916498671, |
|
"learning_rate": 4.697710516419506e-05, |
|
"loss": 0.4732, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.0504774897680764, |
|
"grad_norm": 0.3052981614611353, |
|
"learning_rate": 4.692607545470724e-05, |
|
"loss": 0.4819, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.0572987721691678, |
|
"grad_norm": 0.3082531398269355, |
|
"learning_rate": 4.6874650181873434e-05, |
|
"loss": 0.4621, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.0641200545702592, |
|
"grad_norm": 0.3293876714456996, |
|
"learning_rate": 4.6822830392840454e-05, |
|
"loss": 0.4692, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.0709413369713505, |
|
"grad_norm": 0.2874139325705941, |
|
"learning_rate": 4.677061714278845e-05, |
|
"loss": 0.4739, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.077762619372442, |
|
"grad_norm": 0.3172277958685006, |
|
"learning_rate": 4.671801149490942e-05, |
|
"loss": 0.476, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.0845839017735335, |
|
"grad_norm": 0.3085088466565494, |
|
"learning_rate": 4.666501452038555e-05, |
|
"loss": 0.4688, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.0914051841746248, |
|
"grad_norm": 0.3314327355813381, |
|
"learning_rate": 4.661162729836742e-05, |
|
"loss": 0.4714, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0982264665757162, |
|
"grad_norm": 0.4003761744144885, |
|
"learning_rate": 4.655785091595203e-05, |
|
"loss": 0.4696, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.1050477489768076, |
|
"grad_norm": 0.351393239914098, |
|
"learning_rate": 4.650368646816063e-05, |
|
"loss": 0.4677, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.111869031377899, |
|
"grad_norm": 0.35762582610575827, |
|
"learning_rate": 4.644913505791648e-05, |
|
"loss": 0.4659, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.1186903137789905, |
|
"grad_norm": 0.315908724272643, |
|
"learning_rate": 4.639419779602234e-05, |
|
"loss": 0.4786, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.125511596180082, |
|
"grad_norm": 0.3323051669415947, |
|
"learning_rate": 4.633887580113788e-05, |
|
"loss": 0.4673, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1323328785811733, |
|
"grad_norm": 0.3416812819537007, |
|
"learning_rate": 4.62831701997569e-05, |
|
"loss": 0.4626, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.1391541609822646, |
|
"grad_norm": 0.3346367428692832, |
|
"learning_rate": 4.622708212618436e-05, |
|
"loss": 0.4707, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.145975443383356, |
|
"grad_norm": 0.38356789979052214, |
|
"learning_rate": 4.617061272251334e-05, |
|
"loss": 0.4705, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.1527967257844476, |
|
"grad_norm": 0.3189849965788692, |
|
"learning_rate": 4.6113763138601733e-05, |
|
"loss": 0.4756, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.159618008185539, |
|
"grad_norm": 0.4417765040372437, |
|
"learning_rate": 4.605653453204885e-05, |
|
"loss": 0.4686, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.1664392905866303, |
|
"grad_norm": 0.37245869543006016, |
|
"learning_rate": 4.5998928068171855e-05, |
|
"loss": 0.4784, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.1732605729877217, |
|
"grad_norm": 0.4203605877476329, |
|
"learning_rate": 4.594094491998202e-05, |
|
"loss": 0.4737, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.180081855388813, |
|
"grad_norm": 0.36494183086977755, |
|
"learning_rate": 4.588258626816087e-05, |
|
"loss": 0.4689, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.1869031377899044, |
|
"grad_norm": 0.40333895829909894, |
|
"learning_rate": 4.582385330103609e-05, |
|
"loss": 0.4771, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.1937244201909958, |
|
"grad_norm": 0.3901597877953876, |
|
"learning_rate": 4.576474721455738e-05, |
|
"loss": 0.4751, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.2005457025920874, |
|
"grad_norm": 0.34277800089770916, |
|
"learning_rate": 4.570526921227208e-05, |
|
"loss": 0.4666, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2073669849931787, |
|
"grad_norm": 0.39138074126943645, |
|
"learning_rate": 4.564542050530065e-05, |
|
"loss": 0.465, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.21418826739427, |
|
"grad_norm": 0.29677610423800593, |
|
"learning_rate": 4.558520231231203e-05, |
|
"loss": 0.4809, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2210095497953615, |
|
"grad_norm": 1.986257352752189, |
|
"learning_rate": 4.552461585949882e-05, |
|
"loss": 0.4729, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.2278308321964528, |
|
"grad_norm": 0.31433424596600806, |
|
"learning_rate": 4.5463662380552305e-05, |
|
"loss": 0.4691, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.2346521145975444, |
|
"grad_norm": 0.33462141961486536, |
|
"learning_rate": 4.540234311663733e-05, |
|
"loss": 0.479, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.2414733969986358, |
|
"grad_norm": 0.3618725063175746, |
|
"learning_rate": 4.5340659316367076e-05, |
|
"loss": 0.4704, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.2482946793997272, |
|
"grad_norm": 0.3153025248788844, |
|
"learning_rate": 4.5278612235777506e-05, |
|
"loss": 0.4576, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.2551159618008185, |
|
"grad_norm": 0.4061352898494509, |
|
"learning_rate": 4.5216203138301965e-05, |
|
"loss": 0.4522, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.26193724420191, |
|
"grad_norm": 0.3607694811238664, |
|
"learning_rate": 4.515343329474533e-05, |
|
"loss": 0.4706, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.2687585266030013, |
|
"grad_norm": 0.3267827363161734, |
|
"learning_rate": 4.5090303983258145e-05, |
|
"loss": 0.468, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.2755798090040928, |
|
"grad_norm": 0.27843919573690984, |
|
"learning_rate": 4.5026816489310663e-05, |
|
"loss": 0.4661, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.2824010914051842, |
|
"grad_norm": 0.34060835289474956, |
|
"learning_rate": 4.4962972105666594e-05, |
|
"loss": 0.4629, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.2892223738062756, |
|
"grad_norm": 0.37337450694564905, |
|
"learning_rate": 4.4898772132356814e-05, |
|
"loss": 0.4659, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.296043656207367, |
|
"grad_norm": 0.3889912662850152, |
|
"learning_rate": 4.48342178766529e-05, |
|
"loss": 0.4754, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3028649386084583, |
|
"grad_norm": 0.31158287570845944, |
|
"learning_rate": 4.476931065304051e-05, |
|
"loss": 0.4807, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.30968622100955, |
|
"grad_norm": 0.2766567733359623, |
|
"learning_rate": 4.4704051783192586e-05, |
|
"loss": 0.464, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3165075034106413, |
|
"grad_norm": 0.3433197368032259, |
|
"learning_rate": 4.463844259594248e-05, |
|
"loss": 0.4752, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.3233287858117326, |
|
"grad_norm": 0.318333371191306, |
|
"learning_rate": 4.457248442725689e-05, |
|
"loss": 0.475, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.330150068212824, |
|
"grad_norm": 0.31343360813925925, |
|
"learning_rate": 4.450617862020863e-05, |
|
"loss": 0.4672, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.3369713506139154, |
|
"grad_norm": 0.39934458201682166, |
|
"learning_rate": 4.4439526524949284e-05, |
|
"loss": 0.4667, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.3437926330150067, |
|
"grad_norm": 0.35542044713329446, |
|
"learning_rate": 4.4372529498681766e-05, |
|
"loss": 0.4714, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.350613915416098, |
|
"grad_norm": 0.35992889764639413, |
|
"learning_rate": 4.430518890563261e-05, |
|
"loss": 0.471, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.3574351978171897, |
|
"grad_norm": 0.3560037441904099, |
|
"learning_rate": 4.423750611702426e-05, |
|
"loss": 0.4623, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.364256480218281, |
|
"grad_norm": 0.33206985127339195, |
|
"learning_rate": 4.416948251104707e-05, |
|
"loss": 0.4682, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3710777626193724, |
|
"grad_norm": 0.31074855188770295, |
|
"learning_rate": 4.4101119472831344e-05, |
|
"loss": 0.4678, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.3778990450204638, |
|
"grad_norm": 0.31053961787527984, |
|
"learning_rate": 4.403241839441901e-05, |
|
"loss": 0.4688, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.3847203274215554, |
|
"grad_norm": 0.3152896675317222, |
|
"learning_rate": 4.39633806747354e-05, |
|
"loss": 0.4754, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.3915416098226467, |
|
"grad_norm": 0.30982743776045496, |
|
"learning_rate": 4.389400771956065e-05, |
|
"loss": 0.4628, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.398362892223738, |
|
"grad_norm": 0.28953578544241515, |
|
"learning_rate": 4.382430094150115e-05, |
|
"loss": 0.4649, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.4051841746248295, |
|
"grad_norm": 0.31445930491885443, |
|
"learning_rate": 4.3754261759960754e-05, |
|
"loss": 0.4667, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4120054570259208, |
|
"grad_norm": 0.35215307464128237, |
|
"learning_rate": 4.3683891601111885e-05, |
|
"loss": 0.4727, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.4188267394270122, |
|
"grad_norm": 0.32253563108413785, |
|
"learning_rate": 4.3613191897866484e-05, |
|
"loss": 0.4672, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.4256480218281036, |
|
"grad_norm": 0.3679199412491207, |
|
"learning_rate": 4.354216408984683e-05, |
|
"loss": 0.4671, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.4324693042291952, |
|
"grad_norm": 0.33159929778862235, |
|
"learning_rate": 4.3470809623356254e-05, |
|
"loss": 0.4574, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.4392905866302865, |
|
"grad_norm": 0.32071774142054066, |
|
"learning_rate": 4.3399129951349644e-05, |
|
"loss": 0.4679, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.446111869031378, |
|
"grad_norm": 0.33875938116054005, |
|
"learning_rate": 4.3327126533403906e-05, |
|
"loss": 0.4667, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.4529331514324693, |
|
"grad_norm": 0.32046481852129444, |
|
"learning_rate": 4.3254800835688206e-05, |
|
"loss": 0.4664, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.4597544338335606, |
|
"grad_norm": 0.3671443933349803, |
|
"learning_rate": 4.318215433093412e-05, |
|
"loss": 0.4636, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.4665757162346522, |
|
"grad_norm": 0.3877193573168454, |
|
"learning_rate": 4.310918849840568e-05, |
|
"loss": 0.4636, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.4733969986357436, |
|
"grad_norm": 0.37738393252018904, |
|
"learning_rate": 4.3035904823869236e-05, |
|
"loss": 0.4616, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.480218281036835, |
|
"grad_norm": 0.3911245540247714, |
|
"learning_rate": 4.2962304799563145e-05, |
|
"loss": 0.4676, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.4870395634379263, |
|
"grad_norm": 0.3700102178404319, |
|
"learning_rate": 4.2888389924167485e-05, |
|
"loss": 0.4657, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.4938608458390177, |
|
"grad_norm": 0.33699884554276954, |
|
"learning_rate": 4.2814161702773445e-05, |
|
"loss": 0.4765, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.500682128240109, |
|
"grad_norm": 0.3241464736592151, |
|
"learning_rate": 4.273962164685277e-05, |
|
"loss": 0.4707, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5075034106412004, |
|
"grad_norm": 0.36575583201384765, |
|
"learning_rate": 4.266477127422689e-05, |
|
"loss": 0.461, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.514324693042292, |
|
"grad_norm": 0.3186365429573977, |
|
"learning_rate": 4.258961210903607e-05, |
|
"loss": 0.4669, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.5211459754433834, |
|
"grad_norm": 0.4112679503825286, |
|
"learning_rate": 4.251414568170837e-05, |
|
"loss": 0.4662, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.5279672578444747, |
|
"grad_norm": 0.4022859807157274, |
|
"learning_rate": 4.243837352892847e-05, |
|
"loss": 0.4657, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.5347885402455663, |
|
"grad_norm": 0.3204521643007053, |
|
"learning_rate": 4.236229719360637e-05, |
|
"loss": 0.4742, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.5416098226466577, |
|
"grad_norm": 0.48450770190539705, |
|
"learning_rate": 4.2285918224846004e-05, |
|
"loss": 0.4648, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.548431105047749, |
|
"grad_norm": 0.38815781737547445, |
|
"learning_rate": 4.220923817791368e-05, |
|
"loss": 0.4695, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.5552523874488404, |
|
"grad_norm": 0.35417533551611025, |
|
"learning_rate": 4.213225861420638e-05, |
|
"loss": 0.4788, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.5620736698499318, |
|
"grad_norm": 0.33621340683762574, |
|
"learning_rate": 4.205498110122001e-05, |
|
"loss": 0.468, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.5688949522510232, |
|
"grad_norm": 0.3051392387726156, |
|
"learning_rate": 4.1977407212517485e-05, |
|
"loss": 0.4672, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.5757162346521145, |
|
"grad_norm": 0.30403627574032777, |
|
"learning_rate": 4.1899538527696645e-05, |
|
"loss": 0.4709, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.5825375170532059, |
|
"grad_norm": 0.31580261281637434, |
|
"learning_rate": 4.1821376632358125e-05, |
|
"loss": 0.4875, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.5893587994542973, |
|
"grad_norm": 0.3395866286207179, |
|
"learning_rate": 4.174292311807305e-05, |
|
"loss": 0.4585, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.5961800818553888, |
|
"grad_norm": 0.31931572920688767, |
|
"learning_rate": 4.166417958235064e-05, |
|
"loss": 0.4627, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.6030013642564802, |
|
"grad_norm": 0.2678192239884723, |
|
"learning_rate": 4.158514762860567e-05, |
|
"loss": 0.4636, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.6098226466575716, |
|
"grad_norm": 0.30945877917388176, |
|
"learning_rate": 4.150582886612583e-05, |
|
"loss": 0.4675, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.6166439290586632, |
|
"grad_norm": 0.3018968259724802, |
|
"learning_rate": 4.142622491003895e-05, |
|
"loss": 0.4654, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.6234652114597545, |
|
"grad_norm": 0.34852848169490774, |
|
"learning_rate": 4.134633738128011e-05, |
|
"loss": 0.4693, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.630286493860846, |
|
"grad_norm": 0.32787903172740046, |
|
"learning_rate": 4.1266167906558666e-05, |
|
"loss": 0.4626, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.6371077762619373, |
|
"grad_norm": 0.31265931725066487, |
|
"learning_rate": 4.118571811832503e-05, |
|
"loss": 0.4654, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6439290586630286, |
|
"grad_norm": 0.2993073436351214, |
|
"learning_rate": 4.110498965473755e-05, |
|
"loss": 0.4554, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.65075034106412, |
|
"grad_norm": 0.36553912187903026, |
|
"learning_rate": 4.10239841596291e-05, |
|
"loss": 0.4675, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.6575716234652114, |
|
"grad_norm": 0.31918331042181586, |
|
"learning_rate": 4.094270328247358e-05, |
|
"loss": 0.4602, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.6643929058663027, |
|
"grad_norm": 0.28081409466017554, |
|
"learning_rate": 4.0861148678352365e-05, |
|
"loss": 0.4527, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.6712141882673943, |
|
"grad_norm": 0.27849601812181274, |
|
"learning_rate": 4.07793220079206e-05, |
|
"loss": 0.4569, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.6780354706684857, |
|
"grad_norm": 0.33295439455092557, |
|
"learning_rate": 4.0697224937373395e-05, |
|
"loss": 0.46, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.684856753069577, |
|
"grad_norm": 0.3505769876040763, |
|
"learning_rate": 4.0614859138411835e-05, |
|
"loss": 0.468, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.6916780354706686, |
|
"grad_norm": 0.39217466352691527, |
|
"learning_rate": 4.053222628820902e-05, |
|
"loss": 0.4578, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.69849931787176, |
|
"grad_norm": 0.30143581574850914, |
|
"learning_rate": 4.044932806937587e-05, |
|
"loss": 0.4575, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.7053206002728514, |
|
"grad_norm": 0.31505555216878045, |
|
"learning_rate": 4.036616616992688e-05, |
|
"loss": 0.4644, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.7121418826739427, |
|
"grad_norm": 0.2883792300853787, |
|
"learning_rate": 4.0282742283245725e-05, |
|
"loss": 0.4586, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 1.718963165075034, |
|
"grad_norm": 0.2908471999120709, |
|
"learning_rate": 4.0199058108050793e-05, |
|
"loss": 0.4552, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.7257844474761255, |
|
"grad_norm": 0.32328729724020944, |
|
"learning_rate": 4.0115115348360635e-05, |
|
"loss": 0.4606, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 1.7326057298772168, |
|
"grad_norm": 0.3751122636763178, |
|
"learning_rate": 4.003091571345917e-05, |
|
"loss": 0.4446, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.7394270122783082, |
|
"grad_norm": 0.27616502281371696, |
|
"learning_rate": 3.994646091786097e-05, |
|
"loss": 0.4715, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.7462482946793996, |
|
"grad_norm": 0.30232993983843404, |
|
"learning_rate": 3.9861752681276305e-05, |
|
"loss": 0.464, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.7530695770804912, |
|
"grad_norm": 0.2960787022634833, |
|
"learning_rate": 3.977679272857615e-05, |
|
"loss": 0.4588, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 1.7598908594815825, |
|
"grad_norm": 0.3108279513492761, |
|
"learning_rate": 3.969158278975703e-05, |
|
"loss": 0.4698, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.766712141882674, |
|
"grad_norm": 0.2987708195925191, |
|
"learning_rate": 3.9606124599905805e-05, |
|
"loss": 0.4629, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 1.7735334242837655, |
|
"grad_norm": 0.3488308740525556, |
|
"learning_rate": 3.9520419899164383e-05, |
|
"loss": 0.4544, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.7803547066848568, |
|
"grad_norm": 0.32248512138093716, |
|
"learning_rate": 3.9434470432694206e-05, |
|
"loss": 0.4568, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 1.7871759890859482, |
|
"grad_norm": 0.2979029518282203, |
|
"learning_rate": 3.9348277950640785e-05, |
|
"loss": 0.4543, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.7939972714870396, |
|
"grad_norm": 0.2927500842790821, |
|
"learning_rate": 3.926184420809801e-05, |
|
"loss": 0.4675, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 1.800818553888131, |
|
"grad_norm": 0.2944797090124265, |
|
"learning_rate": 3.917517096507245e-05, |
|
"loss": 0.4604, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.8076398362892223, |
|
"grad_norm": 0.2956062921328968, |
|
"learning_rate": 3.908825998644753e-05, |
|
"loss": 0.4657, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.8144611186903137, |
|
"grad_norm": 0.31291911806828143, |
|
"learning_rate": 3.90011130419475e-05, |
|
"loss": 0.457, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.821282401091405, |
|
"grad_norm": 0.28055698375775834, |
|
"learning_rate": 3.891373190610151e-05, |
|
"loss": 0.4596, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 1.8281036834924966, |
|
"grad_norm": 0.2675001092514469, |
|
"learning_rate": 3.882611835820743e-05, |
|
"loss": 0.4502, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.834924965893588, |
|
"grad_norm": 0.2771395030669777, |
|
"learning_rate": 3.87382741822956e-05, |
|
"loss": 0.4694, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.8417462482946794, |
|
"grad_norm": 0.29869108659521465, |
|
"learning_rate": 3.865020116709253e-05, |
|
"loss": 0.461, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.848567530695771, |
|
"grad_norm": 0.3494382288274053, |
|
"learning_rate": 3.856190110598446e-05, |
|
"loss": 0.4622, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 1.8553888130968623, |
|
"grad_norm": 0.352991866875619, |
|
"learning_rate": 3.8473375796980884e-05, |
|
"loss": 0.471, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.8622100954979537, |
|
"grad_norm": 0.2910222507971248, |
|
"learning_rate": 3.8384627042677856e-05, |
|
"loss": 0.4596, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.869031377899045, |
|
"grad_norm": 0.2800350442292248, |
|
"learning_rate": 3.8295656650221365e-05, |
|
"loss": 0.4653, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.8758526603001364, |
|
"grad_norm": 0.29156706940537624, |
|
"learning_rate": 3.8206466431270506e-05, |
|
"loss": 0.4605, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.8826739427012278, |
|
"grad_norm": 0.2933723081470944, |
|
"learning_rate": 3.811705820196057e-05, |
|
"loss": 0.4554, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.8894952251023192, |
|
"grad_norm": 0.29832500170699905, |
|
"learning_rate": 3.8027433782866113e-05, |
|
"loss": 0.455, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 1.8963165075034105, |
|
"grad_norm": 0.25799082440570675, |
|
"learning_rate": 3.793759499896382e-05, |
|
"loss": 0.4486, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.9031377899045019, |
|
"grad_norm": 0.3235586101259049, |
|
"learning_rate": 3.78475436795954e-05, |
|
"loss": 0.4667, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 1.9099590723055935, |
|
"grad_norm": 0.3074786143159734, |
|
"learning_rate": 3.775728165843031e-05, |
|
"loss": 0.4576, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9167803547066848, |
|
"grad_norm": 0.3603064254550076, |
|
"learning_rate": 3.7666810773428404e-05, |
|
"loss": 0.4569, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 1.9236016371077762, |
|
"grad_norm": 0.3081793409916903, |
|
"learning_rate": 3.757613286680256e-05, |
|
"loss": 0.4632, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.9304229195088678, |
|
"grad_norm": 0.3089787023285962, |
|
"learning_rate": 3.748524978498111e-05, |
|
"loss": 0.4531, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 1.9372442019099592, |
|
"grad_norm": 0.28472538824854116, |
|
"learning_rate": 3.739416337857026e-05, |
|
"loss": 0.4683, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.9440654843110505, |
|
"grad_norm": 0.33132010335669787, |
|
"learning_rate": 3.730287550231643e-05, |
|
"loss": 0.461, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.950886766712142, |
|
"grad_norm": 0.30425617531682264, |
|
"learning_rate": 3.721138801506844e-05, |
|
"loss": 0.4596, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.9577080491132333, |
|
"grad_norm": 0.33217187574645907, |
|
"learning_rate": 3.7119702779739725e-05, |
|
"loss": 0.4653, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 1.9645293315143246, |
|
"grad_norm": 0.31928783726821597, |
|
"learning_rate": 3.702782166327033e-05, |
|
"loss": 0.4534, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.971350613915416, |
|
"grad_norm": 0.34901830956753394, |
|
"learning_rate": 3.693574653658894e-05, |
|
"loss": 0.4541, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 1.9781718963165074, |
|
"grad_norm": 0.31094667952595445, |
|
"learning_rate": 3.6843479274574786e-05, |
|
"loss": 0.4493, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.984993178717599, |
|
"grad_norm": 0.2979687660697197, |
|
"learning_rate": 3.6751021756019445e-05, |
|
"loss": 0.4641, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.9918144611186903, |
|
"grad_norm": 0.3084947114843808, |
|
"learning_rate": 3.665837586358858e-05, |
|
"loss": 0.4611, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.9986357435197817, |
|
"grad_norm": 0.3029857564961375, |
|
"learning_rate": 3.6565543483783625e-05, |
|
"loss": 0.4509, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 2.0054570259208733, |
|
"grad_norm": 0.3135726662910769, |
|
"learning_rate": 3.647252650690337e-05, |
|
"loss": 0.4236, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.0122783083219646, |
|
"grad_norm": 0.31311249123809054, |
|
"learning_rate": 3.6379326827005446e-05, |
|
"loss": 0.4171, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.019099590723056, |
|
"grad_norm": 0.3129396844691106, |
|
"learning_rate": 3.628594634186778e-05, |
|
"loss": 0.4254, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.0259208731241474, |
|
"grad_norm": 0.32116328115030246, |
|
"learning_rate": 3.6192386952949956e-05, |
|
"loss": 0.42, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 2.0327421555252387, |
|
"grad_norm": 0.3069414900056045, |
|
"learning_rate": 3.609865056535446e-05, |
|
"loss": 0.4331, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.03956343792633, |
|
"grad_norm": 0.2632126018742137, |
|
"learning_rate": 3.600473908778795e-05, |
|
"loss": 0.4225, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 2.0463847203274215, |
|
"grad_norm": 0.32874921996960255, |
|
"learning_rate": 3.5910654432522307e-05, |
|
"loss": 0.41, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.053206002728513, |
|
"grad_norm": 0.3105130163069014, |
|
"learning_rate": 3.5816398515355756e-05, |
|
"loss": 0.4292, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 2.060027285129604, |
|
"grad_norm": 0.32641874471348564, |
|
"learning_rate": 3.572197325557389e-05, |
|
"loss": 0.4187, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.0668485675306956, |
|
"grad_norm": 0.2941991953873116, |
|
"learning_rate": 3.5627380575910477e-05, |
|
"loss": 0.4239, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 2.0736698499317874, |
|
"grad_norm": 0.3182323852409404, |
|
"learning_rate": 3.5532622402508375e-05, |
|
"loss": 0.4268, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.0804911323328787, |
|
"grad_norm": 0.31245460362116023, |
|
"learning_rate": 3.5437700664880356e-05, |
|
"loss": 0.4263, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.08731241473397, |
|
"grad_norm": 0.34871682717496116, |
|
"learning_rate": 3.534261729586974e-05, |
|
"loss": 0.4193, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.0941336971350615, |
|
"grad_norm": 0.37090856535041666, |
|
"learning_rate": 3.5247374231611035e-05, |
|
"loss": 0.4246, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 2.100954979536153, |
|
"grad_norm": 0.30717318325796616, |
|
"learning_rate": 3.515197341149059e-05, |
|
"loss": 0.4317, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.107776261937244, |
|
"grad_norm": 0.31921982872180504, |
|
"learning_rate": 3.5056416778107046e-05, |
|
"loss": 0.4207, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 2.1145975443383356, |
|
"grad_norm": 0.2991247847589312, |
|
"learning_rate": 3.496070627723176e-05, |
|
"loss": 0.4299, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.121418826739427, |
|
"grad_norm": 0.3816219601230735, |
|
"learning_rate": 3.486484385776925e-05, |
|
"loss": 0.4359, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 2.1282401091405183, |
|
"grad_norm": 0.30922612207452377, |
|
"learning_rate": 3.476883147171746e-05, |
|
"loss": 0.4148, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.1350613915416097, |
|
"grad_norm": 0.35605310989928785, |
|
"learning_rate": 3.467267107412804e-05, |
|
"loss": 0.4216, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 2.141882673942701, |
|
"grad_norm": 0.31876379646066433, |
|
"learning_rate": 3.457636462306649e-05, |
|
"loss": 0.4181, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.148703956343793, |
|
"grad_norm": 0.3348502402747506, |
|
"learning_rate": 3.447991407957238e-05, |
|
"loss": 0.4258, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.155525238744884, |
|
"grad_norm": 0.3103190282044908, |
|
"learning_rate": 3.43833214076193e-05, |
|
"loss": 0.4298, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.1623465211459756, |
|
"grad_norm": 0.26280179921818947, |
|
"learning_rate": 3.428658857407498e-05, |
|
"loss": 0.42, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 2.169167803547067, |
|
"grad_norm": 0.27387018972309857, |
|
"learning_rate": 3.4189717548661155e-05, |
|
"loss": 0.4265, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.1759890859481583, |
|
"grad_norm": 0.27965256772921515, |
|
"learning_rate": 3.40927103039135e-05, |
|
"loss": 0.4306, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 2.1828103683492497, |
|
"grad_norm": 0.354633813195912, |
|
"learning_rate": 3.3995568815141475e-05, |
|
"loss": 0.4319, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.189631650750341, |
|
"grad_norm": 0.30928248025299715, |
|
"learning_rate": 3.389829506038806e-05, |
|
"loss": 0.409, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 2.1964529331514324, |
|
"grad_norm": 0.271879368659074, |
|
"learning_rate": 3.38008910203895e-05, |
|
"loss": 0.4242, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.203274215552524, |
|
"grad_norm": 0.28795120711169647, |
|
"learning_rate": 3.3703358678535e-05, |
|
"loss": 0.428, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 2.210095497953615, |
|
"grad_norm": 0.27920888733605737, |
|
"learning_rate": 3.360570002082627e-05, |
|
"loss": 0.4272, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.2169167803547065, |
|
"grad_norm": 0.3041918063287464, |
|
"learning_rate": 3.3507917035837156e-05, |
|
"loss": 0.4244, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.223738062755798, |
|
"grad_norm": 0.2833032542430106, |
|
"learning_rate": 3.3410011714673116e-05, |
|
"loss": 0.4264, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.2305593451568897, |
|
"grad_norm": 0.30023594828150446, |
|
"learning_rate": 3.331198605093066e-05, |
|
"loss": 0.4346, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 2.237380627557981, |
|
"grad_norm": 0.27055036739880434, |
|
"learning_rate": 3.321384204065679e-05, |
|
"loss": 0.4231, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.2442019099590724, |
|
"grad_norm": 0.2956240648237309, |
|
"learning_rate": 3.311558168230833e-05, |
|
"loss": 0.4264, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 2.251023192360164, |
|
"grad_norm": 0.27941390605638816, |
|
"learning_rate": 3.3017206976711234e-05, |
|
"loss": 0.4299, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.257844474761255, |
|
"grad_norm": 0.2944715963544699, |
|
"learning_rate": 3.2918719927019874e-05, |
|
"loss": 0.4253, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 2.2646657571623465, |
|
"grad_norm": 0.26529633059650365, |
|
"learning_rate": 3.28201225386762e-05, |
|
"loss": 0.4229, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.271487039563438, |
|
"grad_norm": 0.2595003331375696, |
|
"learning_rate": 3.272141681936896e-05, |
|
"loss": 0.4127, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 2.2783083219645293, |
|
"grad_norm": 0.282591282398735, |
|
"learning_rate": 3.262260477899277e-05, |
|
"loss": 0.4219, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.2851296043656206, |
|
"grad_norm": 0.2750339518399228, |
|
"learning_rate": 3.252368842960722e-05, |
|
"loss": 0.4292, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.291950886766712, |
|
"grad_norm": 0.2618182153631443, |
|
"learning_rate": 3.242466978539588e-05, |
|
"loss": 0.4241, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.2987721691678034, |
|
"grad_norm": 0.2552841576254521, |
|
"learning_rate": 3.23255508626253e-05, |
|
"loss": 0.4222, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 2.305593451568895, |
|
"grad_norm": 0.28582055607185974, |
|
"learning_rate": 3.222633367960396e-05, |
|
"loss": 0.428, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.3124147339699865, |
|
"grad_norm": 0.28026916468523033, |
|
"learning_rate": 3.212702025664117e-05, |
|
"loss": 0.4207, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 2.319236016371078, |
|
"grad_norm": 0.2676116279510356, |
|
"learning_rate": 3.2027612616005894e-05, |
|
"loss": 0.415, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.3260572987721693, |
|
"grad_norm": 0.2696917737491691, |
|
"learning_rate": 3.192811278188565e-05, |
|
"loss": 0.4301, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 2.3328785811732606, |
|
"grad_norm": 0.327138021087197, |
|
"learning_rate": 3.182852278034519e-05, |
|
"loss": 0.4307, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.339699863574352, |
|
"grad_norm": 0.2907867883983245, |
|
"learning_rate": 3.172884463928536e-05, |
|
"loss": 0.4176, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 2.3465211459754434, |
|
"grad_norm": 0.2615366115260166, |
|
"learning_rate": 3.162908038840168e-05, |
|
"loss": 0.4193, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.3533424283765347, |
|
"grad_norm": 0.25427967252321887, |
|
"learning_rate": 3.152923205914315e-05, |
|
"loss": 0.4192, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.360163710777626, |
|
"grad_norm": 0.2839198481754373, |
|
"learning_rate": 3.142930168467076e-05, |
|
"loss": 0.4193, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.3669849931787175, |
|
"grad_norm": 0.27753752053645026, |
|
"learning_rate": 3.132929129981616e-05, |
|
"loss": 0.4235, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 2.373806275579809, |
|
"grad_norm": 0.28750808475579537, |
|
"learning_rate": 3.1229202941040236e-05, |
|
"loss": 0.4125, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.3806275579809, |
|
"grad_norm": 0.27914304459801476, |
|
"learning_rate": 3.112903864639159e-05, |
|
"loss": 0.416, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 2.3874488403819916, |
|
"grad_norm": 0.26521947027144227, |
|
"learning_rate": 3.1028800455465076e-05, |
|
"loss": 0.4221, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.3942701227830834, |
|
"grad_norm": 0.3124062643534349, |
|
"learning_rate": 3.092849040936026e-05, |
|
"loss": 0.4245, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 2.4010914051841747, |
|
"grad_norm": 0.2552430115684927, |
|
"learning_rate": 3.082811055063987e-05, |
|
"loss": 0.4341, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.407912687585266, |
|
"grad_norm": 0.26492843264370497, |
|
"learning_rate": 3.072766292328816e-05, |
|
"loss": 0.4327, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 2.4147339699863575, |
|
"grad_norm": 0.2712316410768401, |
|
"learning_rate": 3.062714957266937e-05, |
|
"loss": 0.424, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.421555252387449, |
|
"grad_norm": 0.27122320185449333, |
|
"learning_rate": 3.0526572545485996e-05, |
|
"loss": 0.4261, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.42837653478854, |
|
"grad_norm": 0.31474914331449816, |
|
"learning_rate": 3.0425933889737146e-05, |
|
"loss": 0.4297, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.4351978171896316, |
|
"grad_norm": 0.2919691217077298, |
|
"learning_rate": 3.032523565467686e-05, |
|
"loss": 0.4205, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 2.442019099590723, |
|
"grad_norm": 0.27221107044977205, |
|
"learning_rate": 3.022447989077235e-05, |
|
"loss": 0.4287, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.4488403819918143, |
|
"grad_norm": 0.25942778646026976, |
|
"learning_rate": 3.012366864966225e-05, |
|
"loss": 0.4222, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 2.4556616643929057, |
|
"grad_norm": 0.3071419336788923, |
|
"learning_rate": 3.0022803984114874e-05, |
|
"loss": 0.4257, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.4624829467939975, |
|
"grad_norm": 0.27582616943958793, |
|
"learning_rate": 2.9921887947986366e-05, |
|
"loss": 0.418, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 2.469304229195089, |
|
"grad_norm": 0.2671863620869304, |
|
"learning_rate": 2.9820922596178913e-05, |
|
"loss": 0.4255, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.47612551159618, |
|
"grad_norm": 0.24860100398833726, |
|
"learning_rate": 2.971990998459889e-05, |
|
"loss": 0.4156, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 2.4829467939972716, |
|
"grad_norm": 0.284594081004446, |
|
"learning_rate": 2.961885217011499e-05, |
|
"loss": 0.4223, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.489768076398363, |
|
"grad_norm": 0.2579836114359165, |
|
"learning_rate": 2.951775121051638e-05, |
|
"loss": 0.4216, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.4965893587994543, |
|
"grad_norm": 0.31508298112234534, |
|
"learning_rate": 2.9416609164470742e-05, |
|
"loss": 0.4175, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.5034106412005457, |
|
"grad_norm": 0.292088251441105, |
|
"learning_rate": 2.9315428091482378e-05, |
|
"loss": 0.4231, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 2.510231923601637, |
|
"grad_norm": 0.2412081187085273, |
|
"learning_rate": 2.921421005185028e-05, |
|
"loss": 0.4294, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.5170532060027284, |
|
"grad_norm": 0.2636673784819988, |
|
"learning_rate": 2.9112957106626215e-05, |
|
"loss": 0.42, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 2.52387448840382, |
|
"grad_norm": 0.26621944180879165, |
|
"learning_rate": 2.901167131757264e-05, |
|
"loss": 0.4286, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.530695770804911, |
|
"grad_norm": 0.3122994061435118, |
|
"learning_rate": 2.8910354747120838e-05, |
|
"loss": 0.4294, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 2.5375170532060025, |
|
"grad_norm": 0.28921100624351126, |
|
"learning_rate": 2.88090094583289e-05, |
|
"loss": 0.4188, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.544338335607094, |
|
"grad_norm": 0.2976929836495886, |
|
"learning_rate": 2.8707637514839636e-05, |
|
"loss": 0.4276, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 2.5511596180081857, |
|
"grad_norm": 0.28653245810178596, |
|
"learning_rate": 2.860624098083865e-05, |
|
"loss": 0.4205, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.557980900409277, |
|
"grad_norm": 0.25772469564725126, |
|
"learning_rate": 2.850482192101227e-05, |
|
"loss": 0.4169, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.5648021828103684, |
|
"grad_norm": 0.2755373164575972, |
|
"learning_rate": 2.8403382400505503e-05, |
|
"loss": 0.4224, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.57162346521146, |
|
"grad_norm": 0.26169030436203317, |
|
"learning_rate": 2.8301924484879965e-05, |
|
"loss": 0.428, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 2.578444747612551, |
|
"grad_norm": 0.27151017935464744, |
|
"learning_rate": 2.820045024007188e-05, |
|
"loss": 0.4203, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.5852660300136425, |
|
"grad_norm": 0.28065488272204875, |
|
"learning_rate": 2.8098961732349938e-05, |
|
"loss": 0.4255, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 2.592087312414734, |
|
"grad_norm": 0.24312684897514486, |
|
"learning_rate": 2.799746102827328e-05, |
|
"loss": 0.4297, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.5989085948158253, |
|
"grad_norm": 0.2793981195277466, |
|
"learning_rate": 2.7895950194649396e-05, |
|
"loss": 0.428, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 2.6057298772169166, |
|
"grad_norm": 0.299603171553896, |
|
"learning_rate": 2.779443129849202e-05, |
|
"loss": 0.4248, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.6125511596180084, |
|
"grad_norm": 0.25257119356748714, |
|
"learning_rate": 2.769290640697908e-05, |
|
"loss": 0.4136, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 2.6193724420191, |
|
"grad_norm": 0.26676727856011184, |
|
"learning_rate": 2.759137758741058e-05, |
|
"loss": 0.4177, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.626193724420191, |
|
"grad_norm": 0.2653022309306384, |
|
"learning_rate": 2.74898469071665e-05, |
|
"loss": 0.4214, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.6330150068212825, |
|
"grad_norm": 0.2732195694982371, |
|
"learning_rate": 2.73883164336647e-05, |
|
"loss": 0.4165, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.639836289222374, |
|
"grad_norm": 0.25665300942208025, |
|
"learning_rate": 2.7286788234318873e-05, |
|
"loss": 0.4205, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 2.6466575716234653, |
|
"grad_norm": 0.2744996916924785, |
|
"learning_rate": 2.7185264376496343e-05, |
|
"loss": 0.4335, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.6534788540245566, |
|
"grad_norm": 0.2685342174813685, |
|
"learning_rate": 2.708374692747609e-05, |
|
"loss": 0.4261, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 2.660300136425648, |
|
"grad_norm": 0.25868701017122736, |
|
"learning_rate": 2.698223795440655e-05, |
|
"loss": 0.4126, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.6671214188267394, |
|
"grad_norm": 0.24670171402401134, |
|
"learning_rate": 2.6880739524263577e-05, |
|
"loss": 0.427, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 2.6739427012278307, |
|
"grad_norm": 0.2661683816758132, |
|
"learning_rate": 2.6779253703808354e-05, |
|
"loss": 0.4122, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.680763983628922, |
|
"grad_norm": 0.24338499870433408, |
|
"learning_rate": 2.6677782559545318e-05, |
|
"loss": 0.4276, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 2.6875852660300135, |
|
"grad_norm": 0.2890820143591707, |
|
"learning_rate": 2.657632815768002e-05, |
|
"loss": 0.4243, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.694406548431105, |
|
"grad_norm": 0.2704451519178081, |
|
"learning_rate": 2.647489256407712e-05, |
|
"loss": 0.4172, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.701227830832196, |
|
"grad_norm": 0.2607870631295583, |
|
"learning_rate": 2.6373477844218292e-05, |
|
"loss": 0.4186, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.708049113233288, |
|
"grad_norm": 0.2679123075950831, |
|
"learning_rate": 2.6272086063160174e-05, |
|
"loss": 0.4246, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 2.7148703956343794, |
|
"grad_norm": 0.2510936329742177, |
|
"learning_rate": 2.6170719285492284e-05, |
|
"loss": 0.4176, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.7216916780354707, |
|
"grad_norm": 0.2489113174603498, |
|
"learning_rate": 2.606937957529505e-05, |
|
"loss": 0.4251, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.728512960436562, |
|
"grad_norm": 0.2766865540879013, |
|
"learning_rate": 2.5968068996097704e-05, |
|
"loss": 0.4201, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7353342428376535, |
|
"grad_norm": 0.2619653936916896, |
|
"learning_rate": 2.5866789610836317e-05, |
|
"loss": 0.4319, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 2.742155525238745, |
|
"grad_norm": 0.25836513726725047, |
|
"learning_rate": 2.576554348181178e-05, |
|
"loss": 0.4225, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.748976807639836, |
|
"grad_norm": 0.2812033276566002, |
|
"learning_rate": 2.5664332670647784e-05, |
|
"loss": 0.4105, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 2.7557980900409276, |
|
"grad_norm": 0.26844846621087337, |
|
"learning_rate": 2.5563159238248878e-05, |
|
"loss": 0.4309, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.762619372442019, |
|
"grad_norm": 0.27154792946950407, |
|
"learning_rate": 2.5462025244758464e-05, |
|
"loss": 0.4226, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.7694406548431107, |
|
"grad_norm": 0.23587959371760756, |
|
"learning_rate": 2.536093274951689e-05, |
|
"loss": 0.4214, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.776261937244202, |
|
"grad_norm": 0.24958599275970533, |
|
"learning_rate": 2.5259883811019487e-05, |
|
"loss": 0.426, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 2.7830832196452935, |
|
"grad_norm": 0.256069387441564, |
|
"learning_rate": 2.515888048687467e-05, |
|
"loss": 0.4119, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.789904502046385, |
|
"grad_norm": 0.2743425060857528, |
|
"learning_rate": 2.5057924833762026e-05, |
|
"loss": 0.4235, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 2.796725784447476, |
|
"grad_norm": 0.24792986290013677, |
|
"learning_rate": 2.495701890739044e-05, |
|
"loss": 0.4286, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.8035470668485676, |
|
"grad_norm": 0.2794659297554359, |
|
"learning_rate": 2.4856164762456242e-05, |
|
"loss": 0.4335, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 2.810368349249659, |
|
"grad_norm": 0.24524097260162075, |
|
"learning_rate": 2.4755364452601344e-05, |
|
"loss": 0.416, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.8171896316507503, |
|
"grad_norm": 0.26642107588698416, |
|
"learning_rate": 2.4654620030371468e-05, |
|
"loss": 0.4217, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 2.8240109140518417, |
|
"grad_norm": 0.26001767290569017, |
|
"learning_rate": 2.455393354717431e-05, |
|
"loss": 0.4257, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.830832196452933, |
|
"grad_norm": 0.2468783202942451, |
|
"learning_rate": 2.4453307053237794e-05, |
|
"loss": 0.4134, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.8376534788540244, |
|
"grad_norm": 0.25085358697281507, |
|
"learning_rate": 2.435274259756829e-05, |
|
"loss": 0.4114, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.844474761255116, |
|
"grad_norm": 0.2747138078999621, |
|
"learning_rate": 2.425224222790894e-05, |
|
"loss": 0.427, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 2.851296043656207, |
|
"grad_norm": 0.28121509135866035, |
|
"learning_rate": 2.4151807990697918e-05, |
|
"loss": 0.4191, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.8581173260572985, |
|
"grad_norm": 0.23999527320050015, |
|
"learning_rate": 2.4051441931026798e-05, |
|
"loss": 0.4224, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 2.8649386084583903, |
|
"grad_norm": 0.29784575613259334, |
|
"learning_rate": 2.395114609259885e-05, |
|
"loss": 0.4267, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.8717598908594817, |
|
"grad_norm": 0.26954147028119524, |
|
"learning_rate": 2.3850922517687492e-05, |
|
"loss": 0.4303, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 2.878581173260573, |
|
"grad_norm": 0.25255242413846607, |
|
"learning_rate": 2.3750773247094682e-05, |
|
"loss": 0.426, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.8854024556616644, |
|
"grad_norm": 0.26345645011178265, |
|
"learning_rate": 2.3650700320109343e-05, |
|
"loss": 0.4159, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 2.892223738062756, |
|
"grad_norm": 0.2450797647252615, |
|
"learning_rate": 2.3550705774465858e-05, |
|
"loss": 0.4144, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.899045020463847, |
|
"grad_norm": 0.2960852062725862, |
|
"learning_rate": 2.3450791646302572e-05, |
|
"loss": 0.428, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.9058663028649385, |
|
"grad_norm": 0.23784327150753126, |
|
"learning_rate": 2.3350959970120318e-05, |
|
"loss": 0.4245, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.91268758526603, |
|
"grad_norm": 0.2634044450477537, |
|
"learning_rate": 2.3251212778741012e-05, |
|
"loss": 0.4194, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 2.9195088676671213, |
|
"grad_norm": 0.27382767150964016, |
|
"learning_rate": 2.3151552103266234e-05, |
|
"loss": 0.4234, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.926330150068213, |
|
"grad_norm": 0.2882575767927483, |
|
"learning_rate": 2.3051979973035913e-05, |
|
"loss": 0.4161, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 2.9331514324693044, |
|
"grad_norm": 0.29844342531485935, |
|
"learning_rate": 2.295249841558696e-05, |
|
"loss": 0.4232, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.939972714870396, |
|
"grad_norm": 0.26905163387508313, |
|
"learning_rate": 2.2853109456611987e-05, |
|
"loss": 0.4164, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 2.946793997271487, |
|
"grad_norm": 0.2646371089556455, |
|
"learning_rate": 2.2753815119918076e-05, |
|
"loss": 0.4153, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.9536152796725785, |
|
"grad_norm": 0.25665046095413097, |
|
"learning_rate": 2.2654617427385583e-05, |
|
"loss": 0.4222, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 2.96043656207367, |
|
"grad_norm": 0.24014304321760452, |
|
"learning_rate": 2.2555518398926928e-05, |
|
"loss": 0.4153, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.9672578444747613, |
|
"grad_norm": 0.2783657011518547, |
|
"learning_rate": 2.2456520052445484e-05, |
|
"loss": 0.4236, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.9740791268758526, |
|
"grad_norm": 0.2362458119689319, |
|
"learning_rate": 2.2357624403794497e-05, |
|
"loss": 0.4181, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.980900409276944, |
|
"grad_norm": 0.2692604610177288, |
|
"learning_rate": 2.2258833466736016e-05, |
|
"loss": 0.4229, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 2.9877216916780354, |
|
"grad_norm": 0.2570622690964156, |
|
"learning_rate": 2.2160149252899913e-05, |
|
"loss": 0.4189, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.9945429740791267, |
|
"grad_norm": 0.23880413998979652, |
|
"learning_rate": 2.206157377174292e-05, |
|
"loss": 0.4215, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 3.001364256480218, |
|
"grad_norm": 0.28400466218366754, |
|
"learning_rate": 2.196310903050767e-05, |
|
"loss": 0.4086, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.00818553888131, |
|
"grad_norm": 0.26762839162616747, |
|
"learning_rate": 2.1864757034181883e-05, |
|
"loss": 0.3902, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 3.0150068212824013, |
|
"grad_norm": 0.25290575577881014, |
|
"learning_rate": 2.176651978545749e-05, |
|
"loss": 0.39, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 3.0218281036834926, |
|
"grad_norm": 0.24735456853320856, |
|
"learning_rate": 2.166839928468988e-05, |
|
"loss": 0.384, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 3.028649386084584, |
|
"grad_norm": 0.2608451564395861, |
|
"learning_rate": 2.1570397529857172e-05, |
|
"loss": 0.3879, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 3.0354706684856754, |
|
"grad_norm": 0.26650939101645865, |
|
"learning_rate": 2.1472516516519524e-05, |
|
"loss": 0.3868, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 3.0422919508867667, |
|
"grad_norm": 0.2589156089671275, |
|
"learning_rate": 2.1374758237778485e-05, |
|
"loss": 0.387, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 3.049113233287858, |
|
"grad_norm": 0.2553292117208548, |
|
"learning_rate": 2.1277124684236416e-05, |
|
"loss": 0.3869, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 3.0559345156889495, |
|
"grad_norm": 0.26167405276695926, |
|
"learning_rate": 2.117961784395599e-05, |
|
"loss": 0.3938, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 3.062755798090041, |
|
"grad_norm": 0.2723361097370056, |
|
"learning_rate": 2.108223970241964e-05, |
|
"loss": 0.39, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 3.069577080491132, |
|
"grad_norm": 0.23691303745295014, |
|
"learning_rate": 2.09849922424892e-05, |
|
"loss": 0.398, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.0763983628922236, |
|
"grad_norm": 0.26384966366753837, |
|
"learning_rate": 2.0887877444365506e-05, |
|
"loss": 0.386, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 3.083219645293315, |
|
"grad_norm": 0.28652063774948267, |
|
"learning_rate": 2.0790897285548044e-05, |
|
"loss": 0.3979, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 3.0900409276944067, |
|
"grad_norm": 0.24726341399079166, |
|
"learning_rate": 2.0694053740794728e-05, |
|
"loss": 0.3877, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 3.096862210095498, |
|
"grad_norm": 0.3147075219833397, |
|
"learning_rate": 2.0597348782081666e-05, |
|
"loss": 0.3926, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 3.1036834924965895, |
|
"grad_norm": 0.27699488401023803, |
|
"learning_rate": 2.0500784378562997e-05, |
|
"loss": 0.3859, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 3.110504774897681, |
|
"grad_norm": 0.2424654454311902, |
|
"learning_rate": 2.0404362496530832e-05, |
|
"loss": 0.3791, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 3.117326057298772, |
|
"grad_norm": 0.24533073896669516, |
|
"learning_rate": 2.030808509937514e-05, |
|
"loss": 0.384, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 3.1241473396998636, |
|
"grad_norm": 0.2710469516457036, |
|
"learning_rate": 2.0211954147543873e-05, |
|
"loss": 0.3841, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 3.130968622100955, |
|
"grad_norm": 0.25722065555572754, |
|
"learning_rate": 2.0115971598502946e-05, |
|
"loss": 0.391, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 3.1377899045020463, |
|
"grad_norm": 0.23265895912306767, |
|
"learning_rate": 2.002013940669647e-05, |
|
"loss": 0.3898, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.1446111869031377, |
|
"grad_norm": 0.25910259700805677, |
|
"learning_rate": 1.992445952350686e-05, |
|
"loss": 0.3801, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 3.151432469304229, |
|
"grad_norm": 0.252711830390526, |
|
"learning_rate": 1.9828933897215173e-05, |
|
"loss": 0.3869, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 3.1582537517053204, |
|
"grad_norm": 0.2527698741289496, |
|
"learning_rate": 1.9733564472961424e-05, |
|
"loss": 0.3907, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 3.1650750341064118, |
|
"grad_norm": 0.2302111532873343, |
|
"learning_rate": 1.9638353192704918e-05, |
|
"loss": 0.393, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 3.1718963165075036, |
|
"grad_norm": 0.25259862180285164, |
|
"learning_rate": 1.9543301995184803e-05, |
|
"loss": 0.3904, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 3.178717598908595, |
|
"grad_norm": 0.25095781768714637, |
|
"learning_rate": 1.9448412815880517e-05, |
|
"loss": 0.3953, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 3.1855388813096863, |
|
"grad_norm": 0.24601622008423973, |
|
"learning_rate": 1.9353687586972408e-05, |
|
"loss": 0.3913, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 3.1923601637107777, |
|
"grad_norm": 0.25295128919530757, |
|
"learning_rate": 1.9259128237302392e-05, |
|
"loss": 0.3898, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 3.199181446111869, |
|
"grad_norm": 0.25707867349735203, |
|
"learning_rate": 1.9164736692334663e-05, |
|
"loss": 0.3986, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 3.2060027285129604, |
|
"grad_norm": 0.2706545466675487, |
|
"learning_rate": 1.9070514874116492e-05, |
|
"loss": 0.3876, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.212824010914052, |
|
"grad_norm": 0.2315776907522079, |
|
"learning_rate": 1.89764647012391e-05, |
|
"loss": 0.3839, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 3.219645293315143, |
|
"grad_norm": 0.23690592753543196, |
|
"learning_rate": 1.8882588088798565e-05, |
|
"loss": 0.386, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 3.2264665757162345, |
|
"grad_norm": 0.256789498241867, |
|
"learning_rate": 1.878888694835685e-05, |
|
"loss": 0.3867, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 3.233287858117326, |
|
"grad_norm": 0.2667167457952798, |
|
"learning_rate": 1.8695363187902864e-05, |
|
"loss": 0.3777, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 3.2401091405184177, |
|
"grad_norm": 0.2717939410865978, |
|
"learning_rate": 1.860201871181364e-05, |
|
"loss": 0.386, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.246930422919509, |
|
"grad_norm": 0.24664438475145947, |
|
"learning_rate": 1.8508855420815508e-05, |
|
"loss": 0.3877, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 3.2537517053206004, |
|
"grad_norm": 0.2402057833979888, |
|
"learning_rate": 1.8415875211945434e-05, |
|
"loss": 0.3917, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 3.260572987721692, |
|
"grad_norm": 0.2370683883511882, |
|
"learning_rate": 1.832307997851236e-05, |
|
"loss": 0.3939, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 3.267394270122783, |
|
"grad_norm": 0.23764072931125804, |
|
"learning_rate": 1.8230471610058673e-05, |
|
"loss": 0.3878, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 3.2742155525238745, |
|
"grad_norm": 0.25455084354221097, |
|
"learning_rate": 1.813805199232173e-05, |
|
"loss": 0.3935, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.281036834924966, |
|
"grad_norm": 0.2592711310403459, |
|
"learning_rate": 1.8045823007195456e-05, |
|
"loss": 0.383, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 3.2878581173260573, |
|
"grad_norm": 0.22929056385950908, |
|
"learning_rate": 1.7953786532691996e-05, |
|
"loss": 0.3975, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 3.2946793997271486, |
|
"grad_norm": 0.2463955192715735, |
|
"learning_rate": 1.7861944442903523e-05, |
|
"loss": 0.3881, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 3.30150068212824, |
|
"grad_norm": 0.25423450189038377, |
|
"learning_rate": 1.777029860796406e-05, |
|
"loss": 0.3935, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 3.3083219645293314, |
|
"grad_norm": 0.2510864654388527, |
|
"learning_rate": 1.767885089401135e-05, |
|
"loss": 0.3866, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.3151432469304227, |
|
"grad_norm": 0.2386139088635014, |
|
"learning_rate": 1.7587603163148936e-05, |
|
"loss": 0.3812, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 3.321964529331514, |
|
"grad_norm": 0.24126241032577334, |
|
"learning_rate": 1.749655727340819e-05, |
|
"loss": 0.3797, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 3.328785811732606, |
|
"grad_norm": 0.23457981332958217, |
|
"learning_rate": 1.740571507871052e-05, |
|
"loss": 0.4019, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 3.3356070941336973, |
|
"grad_norm": 0.2455529177316154, |
|
"learning_rate": 1.731507842882955e-05, |
|
"loss": 0.3834, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 3.3424283765347886, |
|
"grad_norm": 0.25498588860883886, |
|
"learning_rate": 1.7224649169353547e-05, |
|
"loss": 0.3872, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.34924965893588, |
|
"grad_norm": 0.24993640876977888, |
|
"learning_rate": 1.7134429141647747e-05, |
|
"loss": 0.3896, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 3.3560709413369714, |
|
"grad_norm": 0.2724169506194102, |
|
"learning_rate": 1.704442018281694e-05, |
|
"loss": 0.3939, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 3.3628922237380627, |
|
"grad_norm": 0.24835590557722398, |
|
"learning_rate": 1.695462412566802e-05, |
|
"loss": 0.3918, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 3.369713506139154, |
|
"grad_norm": 0.23729760409009898, |
|
"learning_rate": 1.686504279867267e-05, |
|
"loss": 0.3872, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 3.3765347885402455, |
|
"grad_norm": 0.23242634563284187, |
|
"learning_rate": 1.6775678025930107e-05, |
|
"loss": 0.3894, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.383356070941337, |
|
"grad_norm": 0.23937238350849568, |
|
"learning_rate": 1.6686531627130013e-05, |
|
"loss": 0.39, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 3.390177353342428, |
|
"grad_norm": 0.2402399773084803, |
|
"learning_rate": 1.6597605417515376e-05, |
|
"loss": 0.3908, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 3.39699863574352, |
|
"grad_norm": 0.2411687520633103, |
|
"learning_rate": 1.6508901207845622e-05, |
|
"loss": 0.3933, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 3.4038199181446114, |
|
"grad_norm": 0.2399360785924478, |
|
"learning_rate": 1.6420420804359703e-05, |
|
"loss": 0.3815, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 3.4106412005457027, |
|
"grad_norm": 0.23575057738592686, |
|
"learning_rate": 1.6332166008739303e-05, |
|
"loss": 0.3809, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.417462482946794, |
|
"grad_norm": 0.2410072988517726, |
|
"learning_rate": 1.6244138618072162e-05, |
|
"loss": 0.3921, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 3.4242837653478855, |
|
"grad_norm": 0.2627471930720635, |
|
"learning_rate": 1.6156340424815516e-05, |
|
"loss": 0.3887, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 3.431105047748977, |
|
"grad_norm": 0.27721676597358763, |
|
"learning_rate": 1.6068773216759543e-05, |
|
"loss": 0.3861, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 3.437926330150068, |
|
"grad_norm": 0.22524657033432005, |
|
"learning_rate": 1.5981438776990993e-05, |
|
"loss": 0.3915, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.4447476125511596, |
|
"grad_norm": 0.25782439333115764, |
|
"learning_rate": 1.589433888385689e-05, |
|
"loss": 0.3812, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 3.451568894952251, |
|
"grad_norm": 0.2534837223232656, |
|
"learning_rate": 1.5807475310928277e-05, |
|
"loss": 0.3819, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 3.4583901773533423, |
|
"grad_norm": 0.2411924331188938, |
|
"learning_rate": 1.572084982696415e-05, |
|
"loss": 0.3875, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 3.4652114597544337, |
|
"grad_norm": 0.23815388889239475, |
|
"learning_rate": 1.5634464195875416e-05, |
|
"loss": 0.3762, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 3.472032742155525, |
|
"grad_norm": 0.2512767672555059, |
|
"learning_rate": 1.5548320176688965e-05, |
|
"loss": 0.3846, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 3.4788540245566164, |
|
"grad_norm": 0.22813572411154703, |
|
"learning_rate": 1.5462419523511872e-05, |
|
"loss": 0.3891, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.485675306957708, |
|
"grad_norm": 0.22750656208279468, |
|
"learning_rate": 1.5376763985495692e-05, |
|
"loss": 0.3791, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 3.4924965893587996, |
|
"grad_norm": 0.22941848560650482, |
|
"learning_rate": 1.529135530680079e-05, |
|
"loss": 0.3855, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 3.499317871759891, |
|
"grad_norm": 0.24186200739975727, |
|
"learning_rate": 1.5206195226560888e-05, |
|
"loss": 0.382, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 3.5061391541609823, |
|
"grad_norm": 0.247880498233121, |
|
"learning_rate": 1.5121285478847625e-05, |
|
"loss": 0.3912, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 3.5129604365620737, |
|
"grad_norm": 0.22121916115206966, |
|
"learning_rate": 1.5036627792635219e-05, |
|
"loss": 0.3851, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 3.519781718963165, |
|
"grad_norm": 0.2541273486285163, |
|
"learning_rate": 1.49522238917653e-05, |
|
"loss": 0.3919, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 3.5266030013642564, |
|
"grad_norm": 0.2219378122067416, |
|
"learning_rate": 1.4868075494911813e-05, |
|
"loss": 0.389, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 3.533424283765348, |
|
"grad_norm": 0.23505397012761625, |
|
"learning_rate": 1.4784184315545968e-05, |
|
"loss": 0.3925, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 3.540245566166439, |
|
"grad_norm": 0.243871234675225, |
|
"learning_rate": 1.4700552061901423e-05, |
|
"loss": 0.3941, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 3.547066848567531, |
|
"grad_norm": 0.2399559284041621, |
|
"learning_rate": 1.4617180436939442e-05, |
|
"loss": 0.3864, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.5538881309686223, |
|
"grad_norm": 0.21831642013948907, |
|
"learning_rate": 1.453407113831424e-05, |
|
"loss": 0.3839, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 3.5607094133697137, |
|
"grad_norm": 0.23659655907063717, |
|
"learning_rate": 1.4451225858338425e-05, |
|
"loss": 0.3858, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 3.567530695770805, |
|
"grad_norm": 0.23969385857421838, |
|
"learning_rate": 1.4368646283948506e-05, |
|
"loss": 0.3853, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 3.5743519781718964, |
|
"grad_norm": 0.2449645484046614, |
|
"learning_rate": 1.4286334096670575e-05, |
|
"loss": 0.3805, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 3.581173260572988, |
|
"grad_norm": 0.24260601540494856, |
|
"learning_rate": 1.4204290972586062e-05, |
|
"loss": 0.3945, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 3.587994542974079, |
|
"grad_norm": 0.23850743724344997, |
|
"learning_rate": 1.41225185822976e-05, |
|
"loss": 0.3902, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 3.5948158253751705, |
|
"grad_norm": 0.2200681993067038, |
|
"learning_rate": 1.404101859089499e-05, |
|
"loss": 0.396, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 3.601637107776262, |
|
"grad_norm": 0.22389866141232462, |
|
"learning_rate": 1.3959792657921322e-05, |
|
"loss": 0.398, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 3.6084583901773533, |
|
"grad_norm": 0.22920576973220413, |
|
"learning_rate": 1.3878842437339184e-05, |
|
"loss": 0.3951, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 3.6152796725784446, |
|
"grad_norm": 0.2342034408316575, |
|
"learning_rate": 1.3798169577496956e-05, |
|
"loss": 0.3871, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.622100954979536, |
|
"grad_norm": 0.23776945264796728, |
|
"learning_rate": 1.3717775721095261e-05, |
|
"loss": 0.3893, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 3.6289222373806274, |
|
"grad_norm": 0.2287623836213206, |
|
"learning_rate": 1.363766250515353e-05, |
|
"loss": 0.3926, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 3.6357435197817187, |
|
"grad_norm": 0.2290706247481116, |
|
"learning_rate": 1.3557831560976642e-05, |
|
"loss": 0.3902, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 3.64256480218281, |
|
"grad_norm": 0.23897064324266662, |
|
"learning_rate": 1.3478284514121717e-05, |
|
"loss": 0.3865, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 3.649386084583902, |
|
"grad_norm": 0.22158513812624758, |
|
"learning_rate": 1.3399022984365042e-05, |
|
"loss": 0.3779, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 3.6562073669849933, |
|
"grad_norm": 0.260366723196768, |
|
"learning_rate": 1.3320048585669028e-05, |
|
"loss": 0.3912, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.6630286493860846, |
|
"grad_norm": 0.2327592062400347, |
|
"learning_rate": 1.3241362926149414e-05, |
|
"loss": 0.3788, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 3.669849931787176, |
|
"grad_norm": 0.274275386301667, |
|
"learning_rate": 1.3162967608042468e-05, |
|
"loss": 0.3834, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 3.6766712141882674, |
|
"grad_norm": 0.2544946567880361, |
|
"learning_rate": 1.3084864227672377e-05, |
|
"loss": 0.3929, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 3.6834924965893587, |
|
"grad_norm": 0.23821594624055176, |
|
"learning_rate": 1.300705437541877e-05, |
|
"loss": 0.3773, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.69031377899045, |
|
"grad_norm": 0.22340565386504316, |
|
"learning_rate": 1.2929539635684309e-05, |
|
"loss": 0.3951, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 3.6971350613915415, |
|
"grad_norm": 0.22585370418113979, |
|
"learning_rate": 1.2852321586862407e-05, |
|
"loss": 0.3864, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.7039563437926333, |
|
"grad_norm": 0.223306643538749, |
|
"learning_rate": 1.277540180130513e-05, |
|
"loss": 0.3896, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 3.7107776261937246, |
|
"grad_norm": 0.24095852480087845, |
|
"learning_rate": 1.2698781845291164e-05, |
|
"loss": 0.3986, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.717598908594816, |
|
"grad_norm": 0.23864727514551046, |
|
"learning_rate": 1.262246327899389e-05, |
|
"loss": 0.3845, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 3.7244201909959074, |
|
"grad_norm": 0.22563526124956518, |
|
"learning_rate": 1.2546447656449668e-05, |
|
"loss": 0.38, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.7312414733969987, |
|
"grad_norm": 0.2129828812682242, |
|
"learning_rate": 1.2470736525526169e-05, |
|
"loss": 0.3925, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 3.73806275579809, |
|
"grad_norm": 0.23462714598689244, |
|
"learning_rate": 1.2395331427890827e-05, |
|
"loss": 0.3917, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.7448840381991815, |
|
"grad_norm": 0.23481258502381297, |
|
"learning_rate": 1.2320233898979512e-05, |
|
"loss": 0.381, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 3.751705320600273, |
|
"grad_norm": 0.23006399815335687, |
|
"learning_rate": 1.2245445467965208e-05, |
|
"loss": 0.388, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.758526603001364, |
|
"grad_norm": 0.2292582900286725, |
|
"learning_rate": 1.2170967657726885e-05, |
|
"loss": 0.3863, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 3.7653478854024556, |
|
"grad_norm": 0.20911004549219808, |
|
"learning_rate": 1.2096801984818528e-05, |
|
"loss": 0.3927, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.772169167803547, |
|
"grad_norm": 0.23285590431049188, |
|
"learning_rate": 1.2022949959438203e-05, |
|
"loss": 0.3934, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 3.7789904502046383, |
|
"grad_norm": 0.22371180041699631, |
|
"learning_rate": 1.1949413085397328e-05, |
|
"loss": 0.3854, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.7858117326057297, |
|
"grad_norm": 0.2296288443937746, |
|
"learning_rate": 1.1876192860090073e-05, |
|
"loss": 0.3971, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 3.792633015006821, |
|
"grad_norm": 0.22350404721125006, |
|
"learning_rate": 1.1803290774462848e-05, |
|
"loss": 0.3896, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.799454297407913, |
|
"grad_norm": 0.21611095868943725, |
|
"learning_rate": 1.1730708312983925e-05, |
|
"loss": 0.3845, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 3.806275579809004, |
|
"grad_norm": 0.22943694058753963, |
|
"learning_rate": 1.1658446953613246e-05, |
|
"loss": 0.3844, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.8130968622100956, |
|
"grad_norm": 0.23106087721823299, |
|
"learning_rate": 1.1586508167772334e-05, |
|
"loss": 0.389, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 3.819918144611187, |
|
"grad_norm": 0.22389625388226372, |
|
"learning_rate": 1.1514893420314252e-05, |
|
"loss": 0.3871, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.8267394270122783, |
|
"grad_norm": 0.2336575786987886, |
|
"learning_rate": 1.1443604169493887e-05, |
|
"loss": 0.3855, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 3.8335607094133697, |
|
"grad_norm": 0.24695715433752222, |
|
"learning_rate": 1.1372641866938197e-05, |
|
"loss": 0.3834, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.840381991814461, |
|
"grad_norm": 0.23095097247032337, |
|
"learning_rate": 1.1302007957616626e-05, |
|
"loss": 0.3868, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 3.8472032742155524, |
|
"grad_norm": 0.2309262382228596, |
|
"learning_rate": 1.123170387981174e-05, |
|
"loss": 0.3842, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.854024556616644, |
|
"grad_norm": 0.22324542558506838, |
|
"learning_rate": 1.116173106508991e-05, |
|
"loss": 0.3874, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 3.8608458390177356, |
|
"grad_norm": 0.2336403968417466, |
|
"learning_rate": 1.1092090938272154e-05, |
|
"loss": 0.3856, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.867667121418827, |
|
"grad_norm": 0.22474941288116115, |
|
"learning_rate": 1.1022784917405146e-05, |
|
"loss": 0.3931, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 3.8744884038199183, |
|
"grad_norm": 0.23021154838376737, |
|
"learning_rate": 1.0953814413732325e-05, |
|
"loss": 0.3913, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.8813096862210097, |
|
"grad_norm": 0.23687546810217178, |
|
"learning_rate": 1.0885180831665148e-05, |
|
"loss": 0.3921, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 3.888130968622101, |
|
"grad_norm": 0.22277814774085422, |
|
"learning_rate": 1.0816885568754533e-05, |
|
"loss": 0.3883, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.8949522510231924, |
|
"grad_norm": 0.2287671628320653, |
|
"learning_rate": 1.074893001566237e-05, |
|
"loss": 0.3859, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 3.901773533424284, |
|
"grad_norm": 0.22176934372528126, |
|
"learning_rate": 1.0681315556133193e-05, |
|
"loss": 0.3848, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.908594815825375, |
|
"grad_norm": 0.24129010351058253, |
|
"learning_rate": 1.0614043566966036e-05, |
|
"loss": 0.3827, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 3.9154160982264665, |
|
"grad_norm": 0.2242584022267207, |
|
"learning_rate": 1.0547115417986394e-05, |
|
"loss": 0.3933, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.922237380627558, |
|
"grad_norm": 0.23752891552617475, |
|
"learning_rate": 1.0480532472018278e-05, |
|
"loss": 0.3909, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 3.9290586630286493, |
|
"grad_norm": 0.21420522853705198, |
|
"learning_rate": 1.041429608485654e-05, |
|
"loss": 0.3884, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.9358799454297406, |
|
"grad_norm": 0.22604709228379252, |
|
"learning_rate": 1.0348407605239225e-05, |
|
"loss": 0.3826, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 3.942701227830832, |
|
"grad_norm": 0.23521675479439724, |
|
"learning_rate": 1.02828683748201e-05, |
|
"loss": 0.3981, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.9495225102319234, |
|
"grad_norm": 0.23258006147829116, |
|
"learning_rate": 1.0217679728141358e-05, |
|
"loss": 0.3889, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 3.956343792633015, |
|
"grad_norm": 0.23352568917122027, |
|
"learning_rate": 1.0152842992606434e-05, |
|
"loss": 0.3791, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.9631650750341065, |
|
"grad_norm": 0.23650015254821877, |
|
"learning_rate": 1.0088359488452965e-05, |
|
"loss": 0.385, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 3.969986357435198, |
|
"grad_norm": 0.21467911019524638, |
|
"learning_rate": 1.0024230528725923e-05, |
|
"loss": 0.3841, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.9768076398362893, |
|
"grad_norm": 0.2118786144254659, |
|
"learning_rate": 9.960457419250868e-06, |
|
"loss": 0.3815, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 3.9836289222373806, |
|
"grad_norm": 0.24661055007603747, |
|
"learning_rate": 9.897041458607355e-06, |
|
"loss": 0.384, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.990450204638472, |
|
"grad_norm": 0.23600245340062118, |
|
"learning_rate": 9.833983938102517e-06, |
|
"loss": 0.3898, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 3.9972714870395634, |
|
"grad_norm": 0.2462434555318692, |
|
"learning_rate": 9.77128614174474e-06, |
|
"loss": 0.3878, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 4.004092769440655, |
|
"grad_norm": 0.22737326101365074, |
|
"learning_rate": 9.708949346217524e-06, |
|
"loss": 0.3721, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 4.0109140518417465, |
|
"grad_norm": 0.2716741438208754, |
|
"learning_rate": 9.6469748208535e-06, |
|
"loss": 0.3653, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 4.017735334242838, |
|
"grad_norm": 0.23957849713614088, |
|
"learning_rate": 9.58536382760858e-06, |
|
"loss": 0.3584, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 4.024556616643929, |
|
"grad_norm": 0.24534646396528859, |
|
"learning_rate": 9.52411762103623e-06, |
|
"loss": 0.3641, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.031377899045021, |
|
"grad_norm": 0.23453648067754057, |
|
"learning_rate": 9.463237448261978e-06, |
|
"loss": 0.3563, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 4.038199181446112, |
|
"grad_norm": 0.2458309245916263, |
|
"learning_rate": 9.402724548957984e-06, |
|
"loss": 0.3525, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 4.045020463847203, |
|
"grad_norm": 0.21575333823040618, |
|
"learning_rate": 9.34258015531779e-06, |
|
"loss": 0.3601, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 4.051841746248295, |
|
"grad_norm": 0.22050939826229063, |
|
"learning_rate": 9.282805492031263e-06, |
|
"loss": 0.3559, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 4.058663028649386, |
|
"grad_norm": 0.2273826757325257, |
|
"learning_rate": 9.22340177625963e-06, |
|
"loss": 0.36, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 4.0654843110504775, |
|
"grad_norm": 0.22114160384151124, |
|
"learning_rate": 9.164370217610695e-06, |
|
"loss": 0.3605, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 4.072305593451569, |
|
"grad_norm": 0.22068975081869985, |
|
"learning_rate": 9.105712018114216e-06, |
|
"loss": 0.3677, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 4.07912687585266, |
|
"grad_norm": 0.21739973803629506, |
|
"learning_rate": 9.047428372197445e-06, |
|
"loss": 0.3604, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 4.085948158253752, |
|
"grad_norm": 0.21792476209759648, |
|
"learning_rate": 8.989520466660758e-06, |
|
"loss": 0.3574, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 4.092769440654843, |
|
"grad_norm": 0.22329035125949306, |
|
"learning_rate": 8.931989480653549e-06, |
|
"loss": 0.3528, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.099590723055934, |
|
"grad_norm": 0.22327717337480657, |
|
"learning_rate": 8.874836585650183e-06, |
|
"loss": 0.3588, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 4.106412005457026, |
|
"grad_norm": 0.2539624450068349, |
|
"learning_rate": 8.81806294542613e-06, |
|
"loss": 0.3658, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 4.113233287858117, |
|
"grad_norm": 0.2279275522447994, |
|
"learning_rate": 8.761669716034316e-06, |
|
"loss": 0.3657, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 4.120054570259208, |
|
"grad_norm": 0.21624233022403727, |
|
"learning_rate": 8.705658045781535e-06, |
|
"loss": 0.3652, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 4.1268758526603, |
|
"grad_norm": 0.22338819767620166, |
|
"learning_rate": 8.65002907520508e-06, |
|
"loss": 0.3554, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 4.133697135061391, |
|
"grad_norm": 0.21933927707300271, |
|
"learning_rate": 8.594783937049542e-06, |
|
"loss": 0.3646, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 4.1405184174624825, |
|
"grad_norm": 0.21745048274348827, |
|
"learning_rate": 8.539923756243726e-06, |
|
"loss": 0.3612, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 4.147339699863575, |
|
"grad_norm": 0.22215248408199215, |
|
"learning_rate": 8.485449649877719e-06, |
|
"loss": 0.3617, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 4.154160982264666, |
|
"grad_norm": 0.21830220010908438, |
|
"learning_rate": 8.431362727180202e-06, |
|
"loss": 0.3653, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 4.1609822646657575, |
|
"grad_norm": 0.2172278500786131, |
|
"learning_rate": 8.377664089495818e-06, |
|
"loss": 0.3586, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.167803547066849, |
|
"grad_norm": 0.22322562699694812, |
|
"learning_rate": 8.32435483026275e-06, |
|
"loss": 0.366, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 4.17462482946794, |
|
"grad_norm": 0.22127876018606277, |
|
"learning_rate": 8.271436034990476e-06, |
|
"loss": 0.3552, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 4.181446111869032, |
|
"grad_norm": 0.2285771675307399, |
|
"learning_rate": 8.21890878123765e-06, |
|
"loss": 0.3601, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 4.188267394270123, |
|
"grad_norm": 0.2245833117692198, |
|
"learning_rate": 8.16677413859016e-06, |
|
"loss": 0.3547, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 4.195088676671214, |
|
"grad_norm": 0.2274333858726854, |
|
"learning_rate": 8.115033168639362e-06, |
|
"loss": 0.3668, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 4.201909959072306, |
|
"grad_norm": 0.2277027755491278, |
|
"learning_rate": 8.063686924960451e-06, |
|
"loss": 0.3656, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 4.208731241473397, |
|
"grad_norm": 0.2217198628514234, |
|
"learning_rate": 8.012736453091002e-06, |
|
"loss": 0.3638, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 4.215552523874488, |
|
"grad_norm": 0.224080006759543, |
|
"learning_rate": 7.962182790509706e-06, |
|
"loss": 0.3638, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 4.22237380627558, |
|
"grad_norm": 0.21533038392555934, |
|
"learning_rate": 7.912026966615206e-06, |
|
"loss": 0.367, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 4.229195088676671, |
|
"grad_norm": 0.21835647709359632, |
|
"learning_rate": 7.862270002705168e-06, |
|
"loss": 0.3592, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.2360163710777625, |
|
"grad_norm": 0.22759755664959258, |
|
"learning_rate": 7.81291291195548e-06, |
|
"loss": 0.3582, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 4.242837653478854, |
|
"grad_norm": 0.21341514951715657, |
|
"learning_rate": 7.763956699399613e-06, |
|
"loss": 0.369, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 4.249658935879945, |
|
"grad_norm": 0.21830086158415568, |
|
"learning_rate": 7.71540236190814e-06, |
|
"loss": 0.3703, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 4.256480218281037, |
|
"grad_norm": 0.22265158567097876, |
|
"learning_rate": 7.667250888168484e-06, |
|
"loss": 0.3569, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 4.263301500682128, |
|
"grad_norm": 0.22227322272741737, |
|
"learning_rate": 7.619503258664734e-06, |
|
"loss": 0.3579, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 4.270122783083219, |
|
"grad_norm": 0.2210961648085674, |
|
"learning_rate": 7.5721604456577165e-06, |
|
"loss": 0.3549, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 4.276944065484311, |
|
"grad_norm": 0.23040654515127004, |
|
"learning_rate": 7.525223413165174e-06, |
|
"loss": 0.3585, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 4.283765347885402, |
|
"grad_norm": 0.22824816791771613, |
|
"learning_rate": 7.478693116942159e-06, |
|
"loss": 0.361, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 4.2905866302864935, |
|
"grad_norm": 0.2169829907264743, |
|
"learning_rate": 7.432570504461546e-06, |
|
"loss": 0.3669, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 4.297407912687586, |
|
"grad_norm": 0.21829402625134559, |
|
"learning_rate": 7.386856514894759e-06, |
|
"loss": 0.3635, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.304229195088677, |
|
"grad_norm": 0.21652462215311938, |
|
"learning_rate": 7.341552079092644e-06, |
|
"loss": 0.3625, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 4.311050477489768, |
|
"grad_norm": 0.21101625882615488, |
|
"learning_rate": 7.296658119566495e-06, |
|
"loss": 0.3588, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 4.31787175989086, |
|
"grad_norm": 0.21735045950549228, |
|
"learning_rate": 7.252175550469309e-06, |
|
"loss": 0.3686, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 4.324693042291951, |
|
"grad_norm": 0.21565787043013268, |
|
"learning_rate": 7.20810527757713e-06, |
|
"loss": 0.3671, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 4.3315143246930425, |
|
"grad_norm": 0.22421139524596934, |
|
"learning_rate": 7.164448198270618e-06, |
|
"loss": 0.3526, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 4.338335607094134, |
|
"grad_norm": 0.22134931294096435, |
|
"learning_rate": 7.121205201516804e-06, |
|
"loss": 0.3567, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 4.345156889495225, |
|
"grad_norm": 0.22666795543953605, |
|
"learning_rate": 7.0783771678509485e-06, |
|
"loss": 0.3726, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 4.351978171896317, |
|
"grad_norm": 0.21253153167501385, |
|
"learning_rate": 7.035964969358627e-06, |
|
"loss": 0.3613, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 4.358799454297408, |
|
"grad_norm": 0.21642915334480572, |
|
"learning_rate": 6.993969469657991e-06, |
|
"loss": 0.3621, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 4.365620736698499, |
|
"grad_norm": 0.21985376534741252, |
|
"learning_rate": 6.952391523882136e-06, |
|
"loss": 0.3644, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.372442019099591, |
|
"grad_norm": 0.20818031870682635, |
|
"learning_rate": 6.911231978661756e-06, |
|
"loss": 0.3577, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 4.379263301500682, |
|
"grad_norm": 0.21797401592355672, |
|
"learning_rate": 6.870491672107829e-06, |
|
"loss": 0.3606, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 4.3860845839017735, |
|
"grad_norm": 0.2223959557046167, |
|
"learning_rate": 6.830171433794615e-06, |
|
"loss": 0.3614, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 4.392905866302865, |
|
"grad_norm": 0.21662605179762412, |
|
"learning_rate": 6.79027208474272e-06, |
|
"loss": 0.3619, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.399727148703956, |
|
"grad_norm": 0.21827160013518693, |
|
"learning_rate": 6.750794437402409e-06, |
|
"loss": 0.3643, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 4.406548431105048, |
|
"grad_norm": 0.2094602127422858, |
|
"learning_rate": 6.711739295637037e-06, |
|
"loss": 0.3665, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 4.413369713506139, |
|
"grad_norm": 0.21671970848566585, |
|
"learning_rate": 6.673107454706698e-06, |
|
"loss": 0.3556, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 4.42019099590723, |
|
"grad_norm": 0.21946450945142332, |
|
"learning_rate": 6.634899701252023e-06, |
|
"loss": 0.3584, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 4.427012278308322, |
|
"grad_norm": 0.21520753526661168, |
|
"learning_rate": 6.597116813278165e-06, |
|
"loss": 0.3587, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 4.433833560709413, |
|
"grad_norm": 0.21879280857490085, |
|
"learning_rate": 6.559759560138951e-06, |
|
"loss": 0.3738, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.440654843110504, |
|
"grad_norm": 0.21443348635512918, |
|
"learning_rate": 6.522828702521229e-06, |
|
"loss": 0.3681, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 4.447476125511596, |
|
"grad_norm": 0.21531181663268836, |
|
"learning_rate": 6.486324992429374e-06, |
|
"loss": 0.3586, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 4.454297407912687, |
|
"grad_norm": 0.22730777162459695, |
|
"learning_rate": 6.450249173169957e-06, |
|
"loss": 0.3647, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 4.461118690313779, |
|
"grad_norm": 0.2186572367289963, |
|
"learning_rate": 6.414601979336641e-06, |
|
"loss": 0.3663, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 4.467939972714871, |
|
"grad_norm": 0.205548359460269, |
|
"learning_rate": 6.379384136795187e-06, |
|
"loss": 0.3652, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 4.474761255115962, |
|
"grad_norm": 0.21275260150841085, |
|
"learning_rate": 6.344596362668717e-06, |
|
"loss": 0.3567, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 4.4815825375170535, |
|
"grad_norm": 0.22394364468614514, |
|
"learning_rate": 6.310239365323067e-06, |
|
"loss": 0.3568, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 4.488403819918145, |
|
"grad_norm": 0.21614224486754255, |
|
"learning_rate": 6.276313844352398e-06, |
|
"loss": 0.3674, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 4.495225102319236, |
|
"grad_norm": 0.22717780466240847, |
|
"learning_rate": 6.242820490564919e-06, |
|
"loss": 0.3579, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 4.502046384720328, |
|
"grad_norm": 0.2194581741821536, |
|
"learning_rate": 6.209759985968859e-06, |
|
"loss": 0.3586, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.508867667121419, |
|
"grad_norm": 0.21205274286854864, |
|
"learning_rate": 6.177133003758534e-06, |
|
"loss": 0.3639, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 4.51568894952251, |
|
"grad_norm": 0.20169862390766227, |
|
"learning_rate": 6.144940208300686e-06, |
|
"loss": 0.3645, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 4.522510231923602, |
|
"grad_norm": 0.2307839103232325, |
|
"learning_rate": 6.113182255120918e-06, |
|
"loss": 0.3612, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 4.529331514324693, |
|
"grad_norm": 0.24589136143206128, |
|
"learning_rate": 6.081859790890362e-06, |
|
"loss": 0.3637, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 4.536152796725784, |
|
"grad_norm": 0.21286529915712976, |
|
"learning_rate": 6.050973453412505e-06, |
|
"loss": 0.3662, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 4.542974079126876, |
|
"grad_norm": 0.21707616385137538, |
|
"learning_rate": 6.02052387161022e-06, |
|
"loss": 0.3593, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 4.549795361527967, |
|
"grad_norm": 0.21842536440351526, |
|
"learning_rate": 5.990511665512928e-06, |
|
"loss": 0.3721, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 4.5566166439290585, |
|
"grad_norm": 0.21294660571272483, |
|
"learning_rate": 5.9609374462439985e-06, |
|
"loss": 0.3676, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 4.56343792633015, |
|
"grad_norm": 0.22337040238839628, |
|
"learning_rate": 5.931801816008301e-06, |
|
"loss": 0.3684, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 4.570259208731241, |
|
"grad_norm": 0.2161036036245841, |
|
"learning_rate": 5.903105368079925e-06, |
|
"loss": 0.3758, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.577080491132333, |
|
"grad_norm": 0.21231866353419215, |
|
"learning_rate": 5.874848686790128e-06, |
|
"loss": 0.3589, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 4.583901773533424, |
|
"grad_norm": 0.21210490903696586, |
|
"learning_rate": 5.84703234751541e-06, |
|
"loss": 0.3627, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.590723055934515, |
|
"grad_norm": 0.23088940972859365, |
|
"learning_rate": 5.819656916665815e-06, |
|
"loss": 0.3683, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 4.597544338335607, |
|
"grad_norm": 0.22016010914864348, |
|
"learning_rate": 5.792722951673392e-06, |
|
"loss": 0.3685, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 4.604365620736699, |
|
"grad_norm": 0.214327414487605, |
|
"learning_rate": 5.766231000980844e-06, |
|
"loss": 0.3656, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 4.61118690313779, |
|
"grad_norm": 0.22682397661714535, |
|
"learning_rate": 5.740181604030356e-06, |
|
"loss": 0.3673, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 4.618008185538882, |
|
"grad_norm": 0.20115490800552044, |
|
"learning_rate": 5.7145752912526205e-06, |
|
"loss": 0.3509, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 4.624829467939973, |
|
"grad_norm": 0.2147637290224296, |
|
"learning_rate": 5.689412584056033e-06, |
|
"loss": 0.365, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 4.631650750341064, |
|
"grad_norm": 0.22835409215980654, |
|
"learning_rate": 5.664693994816064e-06, |
|
"loss": 0.3636, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 4.638472032742156, |
|
"grad_norm": 0.23181824569276913, |
|
"learning_rate": 5.640420026864841e-06, |
|
"loss": 0.3577, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.645293315143247, |
|
"grad_norm": 0.21273142507245463, |
|
"learning_rate": 5.616591174480892e-06, |
|
"loss": 0.3754, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 4.6521145975443385, |
|
"grad_norm": 0.23412233563353524, |
|
"learning_rate": 5.593207922879085e-06, |
|
"loss": 0.3635, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 4.65893587994543, |
|
"grad_norm": 0.23805419714394244, |
|
"learning_rate": 5.5702707482007375e-06, |
|
"loss": 0.3602, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 4.665757162346521, |
|
"grad_norm": 0.22195919424198418, |
|
"learning_rate": 5.547780117503936e-06, |
|
"loss": 0.3615, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 4.672578444747613, |
|
"grad_norm": 0.21560156330796026, |
|
"learning_rate": 5.525736488754013e-06, |
|
"loss": 0.3632, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 4.679399727148704, |
|
"grad_norm": 0.2128851675688275, |
|
"learning_rate": 5.504140310814227e-06, |
|
"loss": 0.3712, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 4.686221009549795, |
|
"grad_norm": 0.21823521503347237, |
|
"learning_rate": 5.482992023436628e-06, |
|
"loss": 0.3626, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 4.693042291950887, |
|
"grad_norm": 0.21111278578924456, |
|
"learning_rate": 5.462292057253084e-06, |
|
"loss": 0.3687, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 4.699863574351978, |
|
"grad_norm": 0.2277150652115561, |
|
"learning_rate": 5.442040833766537e-06, |
|
"loss": 0.3646, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 4.7066848567530695, |
|
"grad_norm": 0.22645677225238678, |
|
"learning_rate": 5.422238765342407e-06, |
|
"loss": 0.3683, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.713506139154161, |
|
"grad_norm": 0.2351028025182847, |
|
"learning_rate": 5.402886255200191e-06, |
|
"loss": 0.3666, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 4.720327421555252, |
|
"grad_norm": 0.2274769392892358, |
|
"learning_rate": 5.383983697405264e-06, |
|
"loss": 0.4029, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 4.727148703956344, |
|
"grad_norm": 0.22256597899239655, |
|
"learning_rate": 5.36553147686085e-06, |
|
"loss": 0.3585, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 4.733969986357435, |
|
"grad_norm": 0.22712154690395114, |
|
"learning_rate": 5.3475299693001705e-06, |
|
"loss": 0.3637, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 4.740791268758526, |
|
"grad_norm": 0.2341092963683226, |
|
"learning_rate": 5.329979541278825e-06, |
|
"loss": 0.3593, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 4.747612551159618, |
|
"grad_norm": 0.21942571969743654, |
|
"learning_rate": 5.312880550167298e-06, |
|
"loss": 0.3702, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 4.754433833560709, |
|
"grad_norm": 0.21824512110145416, |
|
"learning_rate": 5.296233344143691e-06, |
|
"loss": 0.3652, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 4.7612551159618, |
|
"grad_norm": 0.2228367756186903, |
|
"learning_rate": 5.28003826218664e-06, |
|
"loss": 0.3576, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 4.768076398362892, |
|
"grad_norm": 0.206323937884375, |
|
"learning_rate": 5.264295634068407e-06, |
|
"loss": 0.3764, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 4.774897680763983, |
|
"grad_norm": 0.219043701650007, |
|
"learning_rate": 5.249005780348163e-06, |
|
"loss": 0.3629, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.781718963165075, |
|
"grad_norm": 0.2065583178371043, |
|
"learning_rate": 5.234169012365458e-06, |
|
"loss": 0.3674, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 4.788540245566167, |
|
"grad_norm": 0.21803424570263244, |
|
"learning_rate": 5.2197856322339e-06, |
|
"loss": 0.3706, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 4.795361527967258, |
|
"grad_norm": 0.19896410177188684, |
|
"learning_rate": 5.205855932834974e-06, |
|
"loss": 0.3563, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 4.8021828103683495, |
|
"grad_norm": 0.21096568003167546, |
|
"learning_rate": 5.192380197812105e-06, |
|
"loss": 0.3646, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 4.809004092769441, |
|
"grad_norm": 0.20895896960294863, |
|
"learning_rate": 5.1793587015648676e-06, |
|
"loss": 0.3668, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 4.815825375170532, |
|
"grad_norm": 0.21356363495231379, |
|
"learning_rate": 5.1667917092434e-06, |
|
"loss": 0.3606, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 4.822646657571624, |
|
"grad_norm": 0.2280566078765369, |
|
"learning_rate": 5.154679476743011e-06, |
|
"loss": 0.3729, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 4.829467939972715, |
|
"grad_norm": 0.22121541945426695, |
|
"learning_rate": 5.143022250698964e-06, |
|
"loss": 0.3587, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 4.836289222373806, |
|
"grad_norm": 0.2170468593712326, |
|
"learning_rate": 5.1318202684814476e-06, |
|
"loss": 0.3609, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 4.843110504774898, |
|
"grad_norm": 0.21645996535263878, |
|
"learning_rate": 5.121073758190766e-06, |
|
"loss": 0.3753, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 4.849931787175989, |
|
"grad_norm": 0.20281049437817825, |
|
"learning_rate": 5.110782938652669e-06, |
|
"loss": 0.3675, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 4.85675306957708, |
|
"grad_norm": 0.21795270522334995, |
|
"learning_rate": 5.100948019413905e-06, |
|
"loss": 0.3629, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 4.863574351978172, |
|
"grad_norm": 0.2332969083911167, |
|
"learning_rate": 5.091569200737963e-06, |
|
"loss": 0.3662, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 4.870395634379263, |
|
"grad_norm": 0.22343458114543863, |
|
"learning_rate": 5.082646673600981e-06, |
|
"loss": 0.361, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 4.8772169167803545, |
|
"grad_norm": 0.23104806486326432, |
|
"learning_rate": 5.074180619687862e-06, |
|
"loss": 0.3683, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 4.884038199181446, |
|
"grad_norm": 0.2103538496331855, |
|
"learning_rate": 5.066171211388582e-06, |
|
"loss": 0.3587, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 4.890859481582537, |
|
"grad_norm": 0.2260978460150931, |
|
"learning_rate": 5.05861861179467e-06, |
|
"loss": 0.3717, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 4.897680763983629, |
|
"grad_norm": 0.21658341303568335, |
|
"learning_rate": 5.051522974695889e-06, |
|
"loss": 0.3663, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 4.90450204638472, |
|
"grad_norm": 0.2160133005935788, |
|
"learning_rate": 5.044884444577105e-06, |
|
"loss": 0.3701, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 4.911323328785811, |
|
"grad_norm": 0.22687277607733605, |
|
"learning_rate": 5.038703156615354e-06, |
|
"loss": 0.3685, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.918144611186904, |
|
"grad_norm": 0.22565159385590897, |
|
"learning_rate": 5.0329792366770686e-06, |
|
"loss": 0.3682, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 4.924965893587995, |
|
"grad_norm": 0.21363842038305725, |
|
"learning_rate": 5.0277128013155404e-06, |
|
"loss": 0.3647, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 4.931787175989086, |
|
"grad_norm": 0.21645228300911193, |
|
"learning_rate": 5.022903957768524e-06, |
|
"loss": 0.3583, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 4.938608458390178, |
|
"grad_norm": 0.2093479992601062, |
|
"learning_rate": 5.0185528039560695e-06, |
|
"loss": 0.3641, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 4.945429740791269, |
|
"grad_norm": 0.21505943439715722, |
|
"learning_rate": 5.01465942847852e-06, |
|
"loss": 0.3662, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 4.95225102319236, |
|
"grad_norm": 0.21629968585480175, |
|
"learning_rate": 5.01122391061471e-06, |
|
"loss": 0.3666, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 4.959072305593452, |
|
"grad_norm": 0.21294059754853326, |
|
"learning_rate": 5.008246320320353e-06, |
|
"loss": 0.3631, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 4.965893587994543, |
|
"grad_norm": 0.20559301512458342, |
|
"learning_rate": 5.005726718226612e-06, |
|
"loss": 0.3567, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 4.9727148703956345, |
|
"grad_norm": 0.21200057872237174, |
|
"learning_rate": 5.003665155638871e-06, |
|
"loss": 0.3567, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 4.979536152796726, |
|
"grad_norm": 0.21168073451110256, |
|
"learning_rate": 5.002061674535687e-06, |
|
"loss": 0.3642, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 4.986357435197817, |
|
"grad_norm": 0.22189263802123968, |
|
"learning_rate": 5.00091630756793e-06, |
|
"loss": 0.3646, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 4.993178717598909, |
|
"grad_norm": 0.21534576409298245, |
|
"learning_rate": 5.0002290780581325e-06, |
|
"loss": 0.3683, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.21118277872368893, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3618, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3665, |
|
"total_flos": 5731176614461440.0, |
|
"train_loss": 0.4375836861247418, |
|
"train_runtime": 77494.5958, |
|
"train_samples_per_second": 6.048, |
|
"train_steps_per_second": 0.047 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3665, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5731176614461440.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|