{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.597084548104956, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014334542410714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 600.2800903320312, "completions/mean_terminated_length": 549.4417724609375, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.009329446064139942, "grad_norm": 0.15390798449516296, "learning_rate": 1e-06, "loss": 0.0326, "num_tokens": 36741188.0, "reward": 0.4951869547367096, "reward_std": 0.26015907526016235, "rewards/simpleverify_reward/mean": 0.4951869547367096, "rewards/simpleverify_reward/std": 0.49998119473457336, "step": 1 }, { "clip_ratio/high_max": 0.002040916406258475, "clip_ratio/high_mean": 0.0008884637609298807, "clip_ratio/low_mean": 0.000776253073127009, "clip_ratio/low_min": 9.176790354104014e-05, "clip_ratio/region_mean": 0.001664716844970826, "epoch": 0.018658892128279883, "grad_norm": 0.16035835444927216, "learning_rate": 1e-06, "loss": 0.0402, "step": 2 }, { "clip_ratio/high_max": 0.0026521540712565184, "clip_ratio/high_mean": 0.0011930029759241734, "clip_ratio/low_mean": 0.0007639474333700491, "clip_ratio/low_min": 5.708100434276275e-05, "clip_ratio/region_mean": 0.0019569503856473602, "epoch": 0.027988338192419825, "grad_norm": 0.13767586648464203, "learning_rate": 1e-06, "loss": -0.0356, "step": 3 }, { "clip_ratio/high_max": 0.0025948614711523987, "clip_ratio/high_mean": 0.0011277109006186947, "clip_ratio/low_mean": 0.0007378015725407749, "clip_ratio/low_min": 0.00012079647240170743, "clip_ratio/region_mean": 0.0018655124586075544, "epoch": 0.037317784256559766, "grad_norm": 0.12275255471467972, "learning_rate": 1e-06, "loss": -0.0086, "step": 4 }, { "clip_ratio/high_max": 0.002557778687332757, "clip_ratio/high_mean": 0.001121524917834904, "clip_ratio/low_mean": 0.0008878749722498469, "clip_ratio/low_min": 0.00013076019331492716, "clip_ratio/region_mean": 0.002009399853704963, "epoch": 0.04664723032069971, "grad_norm": 0.1217966079711914, "learning_rate": 1e-06, "loss": 0.0253, "step": 5 }, { "clip_ratio/high_max": 0.0031994826713344082, "clip_ratio/high_mean": 0.0015321520622819662, "clip_ratio/low_mean": 0.0011226907390664564, "clip_ratio/low_min": 8.586031071899924e-05, "clip_ratio/region_mean": 0.002654842821357306, "epoch": 0.05597667638483965, "grad_norm": 0.1310003250837326, "learning_rate": 1e-06, "loss": -0.0679, "step": 6 }, { "clip_ratio/high_max": 0.00303863592853304, "clip_ratio/high_mean": 0.001447087044653017, "clip_ratio/low_mean": 0.001268966148927575, "clip_ratio/low_min": 0.0001895853083624388, "clip_ratio/region_mean": 0.0027160532044945285, "epoch": 0.0653061224489796, "grad_norm": 0.13644307851791382, "learning_rate": 1e-06, "loss": -0.0298, "step": 7 }, { "clip_ratio/high_max": 0.002758739014097955, "clip_ratio/high_mean": 0.001347529236227274, "clip_ratio/low_mean": 0.0014581874274881557, "clip_ratio/low_min": 0.00022467970302386675, "clip_ratio/region_mean": 0.0028057167073711753, "epoch": 0.07463556851311953, "grad_norm": 0.14762556552886963, "learning_rate": 1e-06, "loss": -0.0001, "step": 8 }, { "clip_ratio/high_max": 0.0028129097700002603, "clip_ratio/high_mean": 0.0013104464815114625, "clip_ratio/low_mean": 0.0014768756373086944, "clip_ratio/low_min": 0.00033178317517013056, "clip_ratio/region_mean": 0.002787322082440369, "epoch": 0.08396501457725948, "grad_norm": 0.13718098402023315, "learning_rate": 1e-06, "loss": 0.0285, "step": 9 }, { "clip_ratio/high_max": 0.002485598270141054, "clip_ratio/high_mean": 0.001291550697715138, "clip_ratio/low_mean": 0.0014255444948503282, "clip_ratio/low_min": 0.00017188102083309786, "clip_ratio/region_mean": 0.002717095223488286, "epoch": 0.09329446064139942, "grad_norm": 0.12989400327205658, "learning_rate": 1e-06, "loss": -0.0213, "step": 10 }, { "clip_ratio/high_max": 0.0028909092070534825, "clip_ratio/high_mean": 0.001177898364403518, "clip_ratio/low_mean": 0.0013606542852357961, "clip_ratio/low_min": 0.00022655712837149622, "clip_ratio/region_mean": 0.002538552609621547, "epoch": 0.10262390670553936, "grad_norm": 0.13184423744678497, "learning_rate": 1e-06, "loss": -0.0268, "step": 11 }, { "clip_ratio/high_max": 0.002287557836098131, "clip_ratio/high_mean": 0.0010311130045010941, "clip_ratio/low_mean": 0.0012395995436236262, "clip_ratio/low_min": 0.00029447417091432726, "clip_ratio/region_mean": 0.002270712524477858, "epoch": 0.1119533527696793, "grad_norm": 0.12579387426376343, "learning_rate": 1e-06, "loss": 0.0185, "step": 12 }, { "clip_ratio/high_max": 0.0024642881980980746, "clip_ratio/high_mean": 0.00115407105840859, "clip_ratio/low_mean": 0.0013097346891299821, "clip_ratio/low_min": 0.00022087641173129668, "clip_ratio/region_mean": 0.0024638057366246358, "epoch": 0.12128279883381925, "grad_norm": 0.13448752462863922, "learning_rate": 1e-06, "loss": 0.0292, "step": 13 }, { "clip_ratio/high_max": 0.0028096670503146015, "clip_ratio/high_mean": 0.0012107493930670898, "clip_ratio/low_mean": 0.001211751383380033, "clip_ratio/low_min": 0.00017167441728815902, "clip_ratio/region_mean": 0.0024225007655331865, "epoch": 0.1306122448979592, "grad_norm": 0.12716948986053467, "learning_rate": 1e-06, "loss": 0.0234, "step": 14 }, { "clip_ratio/high_max": 0.002425587757898029, "clip_ratio/high_mean": 0.00104737462970661, "clip_ratio/low_mean": 0.0009895519833662547, "clip_ratio/low_min": 8.760188029555138e-05, "clip_ratio/region_mean": 0.0020369266494526528, "epoch": 0.13994169096209913, "grad_norm": 0.12380632013082504, "learning_rate": 1e-06, "loss": 0.0069, "step": 15 }, { "clip_ratio/high_max": 0.0025905574584612623, "clip_ratio/high_mean": 0.0012410984381858725, "clip_ratio/low_mean": 0.0010446769429108826, "clip_ratio/low_min": 0.0001288526018470293, "clip_ratio/region_mean": 0.0022857753647258505, "epoch": 0.14927113702623906, "grad_norm": 0.13263782858848572, "learning_rate": 1e-06, "loss": 0.0004, "step": 16 }, { "clip_ratio/high_max": 0.002322110165550839, "clip_ratio/high_mean": 0.0010622955705912318, "clip_ratio/low_mean": 0.0009947660964826355, "clip_ratio/low_min": 0.00019454384892014787, "clip_ratio/region_mean": 0.0020570616325130686, "epoch": 0.158600583090379, "grad_norm": 0.13527724146842957, "learning_rate": 1e-06, "loss": 0.0222, "step": 17 }, { "clip_ratio/high_max": 0.0030840404360787943, "clip_ratio/high_mean": 0.001330941988271661, "clip_ratio/low_mean": 0.001035875771776773, "clip_ratio/low_min": 0.0001651506663620239, "clip_ratio/region_mean": 0.0023668177746003494, "epoch": 0.16793002915451896, "grad_norm": 0.14462633430957794, "learning_rate": 1e-06, "loss": -0.0272, "step": 18 }, { "clip_ratio/high_max": 0.0024542947721784003, "clip_ratio/high_mean": 0.0012647232906601857, "clip_ratio/low_mean": 0.0010453857139509637, "clip_ratio/low_min": 0.00011290672682662262, "clip_ratio/region_mean": 0.0023101089682313614, "epoch": 0.1772594752186589, "grad_norm": 0.12958800792694092, "learning_rate": 1e-06, "loss": -0.0264, "step": 19 }, { "clip_ratio/high_max": 0.002545717667089775, "clip_ratio/high_mean": 0.0010949806928692851, "clip_ratio/low_mean": 0.0010164689556404483, "clip_ratio/low_min": 9.848788249655627e-05, "clip_ratio/region_mean": 0.0021114496994414367, "epoch": 0.18658892128279883, "grad_norm": 0.11860532313585281, "learning_rate": 1e-06, "loss": 0.0001, "step": 20 }, { "clip_ratio/high_max": 0.0023502288095187396, "clip_ratio/high_mean": 0.0010851689585251734, "clip_ratio/low_mean": 0.0010610801327857189, "clip_ratio/low_min": 0.00015702244400017662, "clip_ratio/region_mean": 0.002146249076758977, "epoch": 0.19591836734693877, "grad_norm": 0.12849994003772736, "learning_rate": 1e-06, "loss": -0.0002, "step": 21 }, { "clip_ratio/high_max": 0.0024970949816633947, "clip_ratio/high_mean": 0.0011011122587660793, "clip_ratio/low_mean": 0.0010701754727051593, "clip_ratio/low_min": 0.00012273165157239418, "clip_ratio/region_mean": 0.0021712877860409208, "epoch": 0.20524781341107873, "grad_norm": 0.12600012123584747, "learning_rate": 1e-06, "loss": -0.0336, "step": 22 }, { "clip_ratio/high_max": 0.0028476907100412063, "clip_ratio/high_mean": 0.001330400391452713, "clip_ratio/low_mean": 0.0010206206570728682, "clip_ratio/low_min": 0.00012209860324219335, "clip_ratio/region_mean": 0.0023510210594395176, "epoch": 0.21457725947521866, "grad_norm": 0.12997882068157196, "learning_rate": 1e-06, "loss": -0.039, "step": 23 }, { "clip_ratio/high_max": 0.002843989888788201, "clip_ratio/high_mean": 0.0012584266005433165, "clip_ratio/low_mean": 0.001166309833934065, "clip_ratio/low_min": 0.00014769538393011317, "clip_ratio/region_mean": 0.002424736405373551, "epoch": 0.2239067055393586, "grad_norm": 0.1239733025431633, "learning_rate": 1e-06, "loss": -0.001, "step": 24 }, { "clip_ratio/high_max": 0.0025600656226743013, "clip_ratio/high_mean": 0.0012093647965230048, "clip_ratio/low_mean": 0.0012888945675513241, "clip_ratio/low_min": 0.00022058806007407838, "clip_ratio/region_mean": 0.002498259309504647, "epoch": 0.23323615160349853, "grad_norm": 0.12248584628105164, "learning_rate": 1e-06, "loss": 0.0448, "step": 25 }, { "clip_ratio/high_max": 0.003117875530733727, "clip_ratio/high_mean": 0.0014248310253606178, "clip_ratio/low_mean": 0.0012216954382893164, "clip_ratio/low_min": 0.0002501699418644421, "clip_ratio/region_mean": 0.0026465264527359977, "epoch": 0.2425655976676385, "grad_norm": 0.14297905564308167, "learning_rate": 1e-06, "loss": -0.0336, "step": 26 }, { "clip_ratio/high_max": 0.0024331261156476103, "clip_ratio/high_mean": 0.0011360391254129354, "clip_ratio/low_mean": 0.0012286992132430896, "clip_ratio/low_min": 0.00022854195776744746, "clip_ratio/region_mean": 0.0023647383277420886, "epoch": 0.2518950437317784, "grad_norm": 0.12130679935216904, "learning_rate": 1e-06, "loss": 0.0061, "step": 27 }, { "clip_ratio/high_max": 0.002945994412584696, "clip_ratio/high_mean": 0.0012949165575264487, "clip_ratio/low_mean": 0.001161579024483217, "clip_ratio/low_min": 0.00013133108677720884, "clip_ratio/region_mean": 0.002456495611113496, "epoch": 0.2612244897959184, "grad_norm": 0.12663106620311737, "learning_rate": 1e-06, "loss": 0.013, "step": 28 }, { "clip_ratio/high_max": 0.002999761069077067, "clip_ratio/high_mean": 0.0014481958205578849, "clip_ratio/low_mean": 0.001349988884612685, "clip_ratio/low_min": 0.00022052016811358044, "clip_ratio/region_mean": 0.0027981847088085487, "epoch": 0.2705539358600583, "grad_norm": 0.12327197194099426, "learning_rate": 1e-06, "loss": -0.0069, "step": 29 }, { "clip_ratio/high_max": 0.0026856460244744085, "clip_ratio/high_mean": 0.001247929823875893, "clip_ratio/low_mean": 0.0012160168189438991, "clip_ratio/low_min": 0.00017969683722185437, "clip_ratio/region_mean": 0.002463946628267877, "epoch": 0.27988338192419826, "grad_norm": 0.11699430644512177, "learning_rate": 1e-06, "loss": -0.0276, "step": 30 }, { "clip_ratio/high_max": 0.002770919840259012, "clip_ratio/high_mean": 0.001350192105746828, "clip_ratio/low_mean": 0.001140667860454414, "clip_ratio/low_min": 0.00015164834076131228, "clip_ratio/region_mean": 0.002490859966201242, "epoch": 0.2892128279883382, "grad_norm": 0.11804838478565216, "learning_rate": 1e-06, "loss": -0.0527, "step": 31 }, { "clip_ratio/high_max": 0.0028033702255925164, "clip_ratio/high_mean": 0.001267817300686147, "clip_ratio/low_mean": 0.0012801564880646765, "clip_ratio/low_min": 0.00023108240202418528, "clip_ratio/region_mean": 0.0025479737741989084, "epoch": 0.29854227405247813, "grad_norm": 0.12966667115688324, "learning_rate": 1e-06, "loss": -0.0081, "step": 32 }, { "clip_ratio/high_max": 0.002397582560661249, "clip_ratio/high_mean": 0.0011626613413682207, "clip_ratio/low_mean": 0.0013735218126385007, "clip_ratio/low_min": 0.00026043377147288993, "clip_ratio/region_mean": 0.0025361831576447003, "epoch": 0.30787172011661806, "grad_norm": 0.1260962039232254, "learning_rate": 1e-06, "loss": 0.0082, "step": 33 }, { "clip_ratio/high_max": 0.002426173727144487, "clip_ratio/high_mean": 0.0012151251939940266, "clip_ratio/low_mean": 0.001366448705084622, "clip_ratio/low_min": 0.00022134304435894592, "clip_ratio/region_mean": 0.0025815738626988605, "epoch": 0.317201166180758, "grad_norm": 0.1335056573152542, "learning_rate": 1e-06, "loss": 0.0099, "step": 34 }, { "clip_ratio/high_max": 0.002810610079905018, "clip_ratio/high_mean": 0.0012503327416197862, "clip_ratio/low_mean": 0.0013840347965015098, "clip_ratio/low_min": 0.00017831472086982103, "clip_ratio/region_mean": 0.0026343676072428934, "epoch": 0.32653061224489793, "grad_norm": 0.1279270052909851, "learning_rate": 1e-06, "loss": 0.0059, "step": 35 }, { "clip_ratio/high_max": 0.0025273438441217877, "clip_ratio/high_mean": 0.001138790245022392, "clip_ratio/low_mean": 0.001418263065716019, "clip_ratio/low_min": 0.00021497471152542857, "clip_ratio/region_mean": 0.0025570532961864956, "epoch": 0.3358600583090379, "grad_norm": 0.13317278027534485, "learning_rate": 1e-06, "loss": 0.0212, "step": 36 }, { "clip_ratio/high_max": 0.0025164432227029465, "clip_ratio/high_mean": 0.0011276436598564032, "clip_ratio/low_mean": 0.0013073582776996773, "clip_ratio/low_min": 0.00022627342241321458, "clip_ratio/region_mean": 0.0024350019739358686, "epoch": 0.34518950437317786, "grad_norm": 0.13060995936393738, "learning_rate": 1e-06, "loss": 0.0039, "step": 37 }, { "clip_ratio/high_max": 0.0028637529103434645, "clip_ratio/high_mean": 0.00132327246319619, "clip_ratio/low_mean": 0.001336863530013943, "clip_ratio/low_min": 0.00024114352345350198, "clip_ratio/region_mean": 0.002660136087797582, "epoch": 0.3545189504373178, "grad_norm": 0.1329226940870285, "learning_rate": 1e-06, "loss": 0.0076, "step": 38 }, { "clip_ratio/high_max": 0.0025575220279279165, "clip_ratio/high_mean": 0.0012403778309817426, "clip_ratio/low_mean": 0.001399258468154585, "clip_ratio/low_min": 0.0002571259856267716, "clip_ratio/region_mean": 0.0026396363173262216, "epoch": 0.3638483965014577, "grad_norm": 0.12120100110769272, "learning_rate": 1e-06, "loss": 0.0232, "step": 39 }, { "clip_ratio/high_max": 0.0024824743013596162, "clip_ratio/high_mean": 0.0011955045811191667, "clip_ratio/low_mean": 0.0012919128421344794, "clip_ratio/low_min": 0.00021298496631061425, "clip_ratio/region_mean": 0.0024874173614080064, "epoch": 0.37317784256559766, "grad_norm": 0.122945636510849, "learning_rate": 1e-06, "loss": -0.0113, "step": 40 }, { "clip_ratio/high_max": 0.002869045907573309, "clip_ratio/high_mean": 0.001285800724872388, "clip_ratio/low_mean": 0.0015269205105141737, "clip_ratio/low_min": 0.00028516309794213157, "clip_ratio/region_mean": 0.002812721228110604, "epoch": 0.3825072886297376, "grad_norm": 0.12388288229703903, "learning_rate": 1e-06, "loss": 0.0419, "step": 41 }, { "clip_ratio/high_max": 0.0029178798286011443, "clip_ratio/high_mean": 0.0012012958577543031, "clip_ratio/low_mean": 0.0012740831043629441, "clip_ratio/low_min": 5.371477254811907e-05, "clip_ratio/region_mean": 0.002475378933013417, "epoch": 0.39183673469387753, "grad_norm": 0.12117232382297516, "learning_rate": 1e-06, "loss": 0.009, "step": 42 }, { "clip_ratio/high_max": 0.002626564892125316, "clip_ratio/high_mean": 0.0012901496957056224, "clip_ratio/low_mean": 0.0014641547277278733, "clip_ratio/low_min": 0.00010231374562863493, "clip_ratio/region_mean": 0.0027543044125195593, "epoch": 0.40116618075801747, "grad_norm": 0.12484277039766312, "learning_rate": 1e-06, "loss": 0.0042, "step": 43 }, { "clip_ratio/high_max": 0.0026751656587293837, "clip_ratio/high_mean": 0.0013287226647662465, "clip_ratio/low_mean": 0.0013269794908410404, "clip_ratio/low_min": 0.0003367429753780016, "clip_ratio/region_mean": 0.002655702155607287, "epoch": 0.41049562682215746, "grad_norm": 0.12827810645103455, "learning_rate": 1e-06, "loss": -0.0553, "step": 44 }, { "clip_ratio/high_max": 0.002334374737984035, "clip_ratio/high_mean": 0.0010745539257186465, "clip_ratio/low_mean": 0.001300467600231059, "clip_ratio/low_min": 0.00024644834047649056, "clip_ratio/region_mean": 0.0023750215186737478, "epoch": 0.4198250728862974, "grad_norm": 0.13005675375461578, "learning_rate": 1e-06, "loss": 0.0297, "step": 45 }, { "clip_ratio/high_max": 0.002404369901341852, "clip_ratio/high_mean": 0.0011186322062712861, "clip_ratio/low_mean": 0.0014560700765287038, "clip_ratio/low_min": 0.00018331499268242624, "clip_ratio/region_mean": 0.0025747022591531277, "epoch": 0.4291545189504373, "grad_norm": 0.1254495084285736, "learning_rate": 1e-06, "loss": 0.0067, "step": 46 }, { "clip_ratio/high_max": 0.0028180109220556915, "clip_ratio/high_mean": 0.0012702910062216688, "clip_ratio/low_mean": 0.0012854832057200838, "clip_ratio/low_min": 0.00023281864105229033, "clip_ratio/region_mean": 0.0025557741901138797, "epoch": 0.43848396501457726, "grad_norm": 0.13360561430454254, "learning_rate": 1e-06, "loss": -0.0471, "step": 47 }, { "clip_ratio/high_max": 0.002505217395082582, "clip_ratio/high_mean": 0.001214497637192835, "clip_ratio/low_mean": 0.0014355284001794644, "clip_ratio/low_min": 0.00041710363802849315, "clip_ratio/region_mean": 0.0026500260792090558, "epoch": 0.4478134110787172, "grad_norm": 0.1364157795906067, "learning_rate": 1e-06, "loss": 0.0439, "step": 48 }, { "clip_ratio/high_max": 0.002508827055862639, "clip_ratio/high_mean": 0.0011414676773711108, "clip_ratio/low_mean": 0.0014058454689802602, "clip_ratio/low_min": 0.0002719927506404929, "clip_ratio/region_mean": 0.002547313124523498, "epoch": 0.45714285714285713, "grad_norm": 0.12717197835445404, "learning_rate": 1e-06, "loss": 0.0095, "step": 49 }, { "clip_ratio/high_max": 0.002623106040118728, "clip_ratio/high_mean": 0.0012397630598570686, "clip_ratio/low_mean": 0.0014246280334191397, "clip_ratio/low_min": 0.0001757342561177211, "clip_ratio/region_mean": 0.0026643910532584414, "epoch": 0.46647230320699706, "grad_norm": 0.13832078874111176, "learning_rate": 1e-06, "loss": -0.0098, "step": 50 }, { "clip_ratio/high_max": 0.0022491210911539383, "clip_ratio/high_mean": 0.0009918743453454226, "clip_ratio/low_mean": 0.0014475052776106168, "clip_ratio/low_min": 0.00035459636001178296, "clip_ratio/region_mean": 0.0024393795829382725, "epoch": 0.47580174927113705, "grad_norm": 0.13653312623500824, "learning_rate": 1e-06, "loss": 0.0559, "step": 51 }, { "clip_ratio/high_max": 0.002902555323089473, "clip_ratio/high_mean": 0.0014078625245019794, "clip_ratio/low_mean": 0.0015024737840576563, "clip_ratio/low_min": 0.00015843062828935217, "clip_ratio/region_mean": 0.0029103363849571906, "epoch": 0.485131195335277, "grad_norm": 0.135065495967865, "learning_rate": 1e-06, "loss": -0.0427, "step": 52 }, { "clip_ratio/high_max": 0.002970592620840762, "clip_ratio/high_mean": 0.001307408521824982, "clip_ratio/low_mean": 0.0016858880917425267, "clip_ratio/low_min": 0.00040222108418674907, "clip_ratio/region_mean": 0.0029932965408079326, "epoch": 0.4944606413994169, "grad_norm": 0.12439320981502533, "learning_rate": 1e-06, "loss": 0.0004, "step": 53 }, { "clip_ratio/high_max": 0.00270408335927641, "clip_ratio/high_mean": 0.0011514070811244892, "clip_ratio/low_mean": 0.001619567414309131, "clip_ratio/low_min": 0.00038420907822001027, "clip_ratio/region_mean": 0.002770974548184313, "epoch": 0.5037900874635568, "grad_norm": 0.11202271282672882, "learning_rate": 1e-06, "loss": 0.0129, "step": 54 }, { "clip_ratio/high_max": 0.0030225567606976256, "clip_ratio/high_mean": 0.0012184053775854409, "clip_ratio/low_mean": 0.0016841035976540297, "clip_ratio/low_min": 0.0003986812216680846, "clip_ratio/region_mean": 0.002902509004343301, "epoch": 0.5131195335276968, "grad_norm": 0.12663580477237701, "learning_rate": 1e-06, "loss": 0.022, "step": 55 }, { "clip_ratio/high_max": 0.0027979429287370294, "clip_ratio/high_mean": 0.0012466053067328176, "clip_ratio/low_mean": 0.0015858514307183214, "clip_ratio/low_min": 0.00025280885984102497, "clip_ratio/region_mean": 0.0028324567392701283, "epoch": 0.5224489795918368, "grad_norm": 0.12395124137401581, "learning_rate": 1e-06, "loss": 0.0161, "step": 56 }, { "clip_ratio/high_max": 0.0025067654423764907, "clip_ratio/high_mean": 0.001166765494417632, "clip_ratio/low_mean": 0.0014032908948138356, "clip_ratio/low_min": 0.000294826317258412, "clip_ratio/region_mean": 0.0025700564074213617, "epoch": 0.5317784256559767, "grad_norm": 0.12397922575473785, "learning_rate": 1e-06, "loss": -0.0104, "step": 57 }, { "clip_ratio/high_max": 0.002598407110781409, "clip_ratio/high_mean": 0.0012452545888663735, "clip_ratio/low_mean": 0.0013761853988398798, "clip_ratio/low_min": 8.24566468509147e-05, "clip_ratio/region_mean": 0.002621439976792317, "epoch": 0.5411078717201167, "grad_norm": 0.11927986145019531, "learning_rate": 1e-06, "loss": 0.0114, "step": 58 }, { "clip_ratio/high_max": 0.002735268135438673, "clip_ratio/high_mean": 0.001259966225916287, "clip_ratio/low_mean": 0.0016105202994367573, "clip_ratio/low_min": 0.0002903498934756499, "clip_ratio/region_mean": 0.0028704865399049595, "epoch": 0.5504373177842565, "grad_norm": 0.13499929010868073, "learning_rate": 1e-06, "loss": 0.0192, "step": 59 }, { "clip_ratio/high_max": 0.002663150749867782, "clip_ratio/high_mean": 0.0013437504931061994, "clip_ratio/low_mean": 0.0012987726768187713, "clip_ratio/low_min": 0.00014798885058553424, "clip_ratio/region_mean": 0.002642523162649013, "epoch": 0.5597667638483965, "grad_norm": 0.11873684078454971, "learning_rate": 1e-06, "loss": -0.0634, "step": 60 }, { "clip_ratio/high_max": 0.0028377935814205557, "clip_ratio/high_mean": 0.0012613039725692943, "clip_ratio/low_mean": 0.0013451987870212179, "clip_ratio/low_min": 7.511279727623332e-05, "clip_ratio/region_mean": 0.002606502835988067, "epoch": 0.5690962099125364, "grad_norm": 0.12681037187576294, "learning_rate": 1e-06, "loss": 0.0163, "step": 61 }, { "clip_ratio/high_max": 0.0026151974452659488, "clip_ratio/high_mean": 0.0012464570500014815, "clip_ratio/low_mean": 0.0014267960323195439, "clip_ratio/low_min": 0.00012743554179905914, "clip_ratio/region_mean": 0.002673253125976771, "epoch": 0.5784256559766764, "grad_norm": 0.13074587285518646, "learning_rate": 1e-06, "loss": 0.0177, "step": 62 }, { "clip_ratio/high_max": 0.0027048191550420597, "clip_ratio/high_mean": 0.001161007228802191, "clip_ratio/low_mean": 0.0015286516900232527, "clip_ratio/low_min": 0.00031732848583487794, "clip_ratio/region_mean": 0.002689658969757147, "epoch": 0.5877551020408164, "grad_norm": 0.1284467577934265, "learning_rate": 1e-06, "loss": 0.0316, "step": 63 }, { "clip_ratio/high_max": 0.0025533259758958593, "clip_ratio/high_mean": 0.001261223471374251, "clip_ratio/low_mean": 0.0014119726583885495, "clip_ratio/low_min": 0.0001766451823641546, "clip_ratio/region_mean": 0.0026731961042969488, "epoch": 0.5970845481049563, "grad_norm": 0.11957770586013794, "learning_rate": 1e-06, "loss": 0.0072, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015223911830357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 611.696044921875, "completions/mean_terminated_length": 557.831298828125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 1.00932944606414, "grad_norm": 0.14397354423999786, "learning_rate": 1e-06, "loss": -0.008, "num_tokens": 73998447.0, "reward": 0.518798828125, "reward_std": 0.24383334815502167, "rewards/simpleverify_reward/mean": 0.518798828125, "rewards/simpleverify_reward/std": 0.499650776386261, "step": 65 }, { "clip_ratio/high_max": 0.0021691659931093454, "clip_ratio/high_mean": 0.000986040557108936, "clip_ratio/low_mean": 0.0006652650536125293, "clip_ratio/low_min": 2.287994448124664e-05, "clip_ratio/region_mean": 0.0016513056034455076, "epoch": 1.01865889212828, "grad_norm": 0.1515653133392334, "learning_rate": 1e-06, "loss": -0.0171, "step": 66 }, { "clip_ratio/high_max": 0.0018939887231681496, "clip_ratio/high_mean": 0.0009140307156485505, "clip_ratio/low_mean": 0.0006537802073580679, "clip_ratio/low_min": 6.565342209796654e-05, "clip_ratio/region_mean": 0.0015678109630243853, "epoch": 1.0279883381924197, "grad_norm": 0.13714466989040375, "learning_rate": 1e-06, "loss": -0.0007, "step": 67 }, { "clip_ratio/high_max": 0.0023807338438928127, "clip_ratio/high_mean": 0.0009935587368090637, "clip_ratio/low_mean": 0.0007937938444229076, "clip_ratio/low_min": 1.8679020286072046e-05, "clip_ratio/region_mean": 0.0017873525648610666, "epoch": 1.0373177842565597, "grad_norm": 0.13353446125984192, "learning_rate": 1e-06, "loss": 0.0018, "step": 68 }, { "clip_ratio/high_max": 0.002437128023302648, "clip_ratio/high_mean": 0.0011547244575922377, "clip_ratio/low_mean": 0.0006415299794753082, "clip_ratio/low_min": 5.737626906920923e-05, "clip_ratio/region_mean": 0.0017962544807232916, "epoch": 1.0466472303206997, "grad_norm": 0.12103041261434555, "learning_rate": 1e-06, "loss": -0.0682, "step": 69 }, { "clip_ratio/high_max": 0.002175499241275247, "clip_ratio/high_mean": 0.0010054696431325283, "clip_ratio/low_mean": 0.0010076910075440537, "clip_ratio/low_min": 0.00017112510613515042, "clip_ratio/region_mean": 0.0020131606652284972, "epoch": 1.0559766763848397, "grad_norm": 0.1344999372959137, "learning_rate": 1e-06, "loss": 0.0129, "step": 70 }, { "clip_ratio/high_max": 0.002760676587058697, "clip_ratio/high_mean": 0.001276401770155644, "clip_ratio/low_mean": 0.0011189830875082407, "clip_ratio/low_min": 8.982975941762561e-05, "clip_ratio/region_mean": 0.002395384850387927, "epoch": 1.0653061224489795, "grad_norm": 0.12529118359088898, "learning_rate": 1e-06, "loss": -0.0272, "step": 71 }, { "clip_ratio/high_max": 0.002425005626719212, "clip_ratio/high_mean": 0.0012668097879213747, "clip_ratio/low_mean": 0.001233299874002114, "clip_ratio/low_min": 0.00016012470769055653, "clip_ratio/region_mean": 0.002500109672837425, "epoch": 1.0746355685131195, "grad_norm": 0.12796109914779663, "learning_rate": 1e-06, "loss": -0.0255, "step": 72 }, { "clip_ratio/high_max": 0.0027319155851728283, "clip_ratio/high_mean": 0.0012632101424969733, "clip_ratio/low_mean": 0.001098712942621205, "clip_ratio/low_min": 8.281663940579165e-05, "clip_ratio/region_mean": 0.0023619230996700935, "epoch": 1.0839650145772595, "grad_norm": 0.13395914435386658, "learning_rate": 1e-06, "loss": 0.0012, "step": 73 }, { "clip_ratio/high_max": 0.002780590672045946, "clip_ratio/high_mean": 0.0012711367890005931, "clip_ratio/low_mean": 0.001221397178596817, "clip_ratio/low_min": 0.00021613473745674128, "clip_ratio/region_mean": 0.0024925339603214525, "epoch": 1.0932944606413995, "grad_norm": 0.1335933655500412, "learning_rate": 1e-06, "loss": -0.0042, "step": 74 }, { "clip_ratio/high_max": 0.0024426456511719152, "clip_ratio/high_mean": 0.0010831852414412424, "clip_ratio/low_mean": 0.0012574343418236822, "clip_ratio/low_min": 0.00013535620473703602, "clip_ratio/region_mean": 0.0023406196196447127, "epoch": 1.1026239067055394, "grad_norm": 0.12600579857826233, "learning_rate": 1e-06, "loss": 0.0296, "step": 75 }, { "clip_ratio/high_max": 0.002680013974895701, "clip_ratio/high_mean": 0.0011871287460962776, "clip_ratio/low_mean": 0.0013076191280561034, "clip_ratio/low_min": 0.00019419438103795983, "clip_ratio/region_mean": 0.002494747932360042, "epoch": 1.1119533527696792, "grad_norm": 0.1342019885778427, "learning_rate": 1e-06, "loss": 0.0173, "step": 76 }, { "clip_ratio/high_max": 0.00274390618869802, "clip_ratio/high_mean": 0.0011277977500867564, "clip_ratio/low_mean": 0.001229974255693378, "clip_ratio/low_min": 0.00015567049013043288, "clip_ratio/region_mean": 0.0023577720057801344, "epoch": 1.1212827988338192, "grad_norm": 0.12711849808692932, "learning_rate": 1e-06, "loss": 0.0563, "step": 77 }, { "clip_ratio/high_max": 0.00250905611392227, "clip_ratio/high_mean": 0.0011992687723250128, "clip_ratio/low_mean": 0.0009937717040884309, "clip_ratio/low_min": 0.00012563654308905825, "clip_ratio/region_mean": 0.0021930404982413165, "epoch": 1.1306122448979592, "grad_norm": 0.12553954124450684, "learning_rate": 1e-06, "loss": 0.0198, "step": 78 }, { "clip_ratio/high_max": 0.002618203783640638, "clip_ratio/high_mean": 0.0011074682588514406, "clip_ratio/low_mean": 0.0010921286484517623, "clip_ratio/low_min": 0.0001569984806337743, "clip_ratio/region_mean": 0.002199596907303203, "epoch": 1.1399416909620992, "grad_norm": 0.12352953851222992, "learning_rate": 1e-06, "loss": 0.0117, "step": 79 }, { "clip_ratio/high_max": 0.0024745419214013964, "clip_ratio/high_mean": 0.001142636134318309, "clip_ratio/low_mean": 0.0010781606142700184, "clip_ratio/low_min": 0.00022882952453073813, "clip_ratio/region_mean": 0.0022207967122085392, "epoch": 1.149271137026239, "grad_norm": 0.13095960021018982, "learning_rate": 1e-06, "loss": 0.0198, "step": 80 }, { "clip_ratio/high_max": 0.0026096545552718453, "clip_ratio/high_mean": 0.0012076985840394627, "clip_ratio/low_mean": 0.0010498119991098065, "clip_ratio/low_min": 0.00014993723016232252, "clip_ratio/region_mean": 0.0022575106195290573, "epoch": 1.158600583090379, "grad_norm": 0.1313534677028656, "learning_rate": 1e-06, "loss": -0.0, "step": 81 }, { "clip_ratio/high_max": 0.0024646231322549284, "clip_ratio/high_mean": 0.0010702001382014714, "clip_ratio/low_mean": 0.0008362182961718645, "clip_ratio/low_min": 5.304603291733656e-05, "clip_ratio/region_mean": 0.00190641845256323, "epoch": 1.167930029154519, "grad_norm": 0.11646479368209839, "learning_rate": 1e-06, "loss": 0.006, "step": 82 }, { "clip_ratio/high_max": 0.0026362404896644875, "clip_ratio/high_mean": 0.0011965960220550187, "clip_ratio/low_mean": 0.0010191613037022762, "clip_ratio/low_min": 9.004955882119248e-05, "clip_ratio/region_mean": 0.0022157572966534644, "epoch": 1.177259475218659, "grad_norm": 0.1397159844636917, "learning_rate": 1e-06, "loss": 0.0168, "step": 83 }, { "clip_ratio/high_max": 0.002594729870907031, "clip_ratio/high_mean": 0.0011606750231294427, "clip_ratio/low_mean": 0.001007588787615532, "clip_ratio/low_min": 0.00011338917374814628, "clip_ratio/region_mean": 0.002168263803469017, "epoch": 1.186588921282799, "grad_norm": 0.116698257625103, "learning_rate": 1e-06, "loss": -0.0184, "step": 84 }, { "clip_ratio/high_max": 0.002944628540717531, "clip_ratio/high_mean": 0.0012051014819007833, "clip_ratio/low_mean": 0.0009211932047037408, "clip_ratio/low_min": 9.40364752750611e-05, "clip_ratio/region_mean": 0.002126294632034842, "epoch": 1.1959183673469387, "grad_norm": 0.12869130074977875, "learning_rate": 1e-06, "loss": -0.0503, "step": 85 }, { "clip_ratio/high_max": 0.003023597426363267, "clip_ratio/high_mean": 0.0012567652265715878, "clip_ratio/low_mean": 0.0010700837374315597, "clip_ratio/low_min": 0.00016499041339557152, "clip_ratio/region_mean": 0.0023268489676411264, "epoch": 1.2052478134110787, "grad_norm": 0.1386902779340744, "learning_rate": 1e-06, "loss": -0.0133, "step": 86 }, { "clip_ratio/high_max": 0.0020999868429498747, "clip_ratio/high_mean": 0.0010495151036593597, "clip_ratio/low_mean": 0.0013269814935483737, "clip_ratio/low_min": 0.00025912320597853977, "clip_ratio/region_mean": 0.0023764966026647016, "epoch": 1.2145772594752187, "grad_norm": 0.11641263216733932, "learning_rate": 1e-06, "loss": 0.0314, "step": 87 }, { "clip_ratio/high_max": 0.002789079662761651, "clip_ratio/high_mean": 0.001221463731781114, "clip_ratio/low_mean": 0.001243677568709245, "clip_ratio/low_min": 6.885257789690513e-05, "clip_ratio/region_mean": 0.0024651413405081257, "epoch": 1.2239067055393587, "grad_norm": 0.13545332849025726, "learning_rate": 1e-06, "loss": 0.0046, "step": 88 }, { "clip_ratio/high_max": 0.0023673701580264606, "clip_ratio/high_mean": 0.001076894306606846, "clip_ratio/low_mean": 0.0011390012023184681, "clip_ratio/low_min": 5.4253629059530795e-05, "clip_ratio/region_mean": 0.0022158954889164306, "epoch": 1.2332361516034984, "grad_norm": 0.12344934791326523, "learning_rate": 1e-06, "loss": -0.027, "step": 89 }, { "clip_ratio/high_max": 0.002265146693389397, "clip_ratio/high_mean": 0.0010651956836227328, "clip_ratio/low_mean": 0.0013935370188846719, "clip_ratio/low_min": 0.00024814777134452015, "clip_ratio/region_mean": 0.0024587327061453834, "epoch": 1.2425655976676384, "grad_norm": 0.1261015087366104, "learning_rate": 1e-06, "loss": 0.0214, "step": 90 }, { "clip_ratio/high_max": 0.0024361753421544563, "clip_ratio/high_mean": 0.0010475805756868795, "clip_ratio/low_mean": 0.0014298978239821736, "clip_ratio/low_min": 0.0002577286486484809, "clip_ratio/region_mean": 0.0024774784251349047, "epoch": 1.2518950437317784, "grad_norm": 0.11616481840610504, "learning_rate": 1e-06, "loss": 0.0076, "step": 91 }, { "clip_ratio/high_max": 0.0026217842314508744, "clip_ratio/high_mean": 0.0012180431076558307, "clip_ratio/low_mean": 0.0014414700308407191, "clip_ratio/low_min": 0.00017707774895825423, "clip_ratio/region_mean": 0.0026595131057547405, "epoch": 1.2612244897959184, "grad_norm": 0.12859784066677094, "learning_rate": 1e-06, "loss": -0.0164, "step": 92 }, { "clip_ratio/high_max": 0.002220765261881752, "clip_ratio/high_mean": 0.0010178859138250118, "clip_ratio/low_mean": 0.0013474979423335753, "clip_ratio/low_min": 0.00016744094500609208, "clip_ratio/region_mean": 0.002365383850701619, "epoch": 1.2705539358600584, "grad_norm": 0.1287926584482193, "learning_rate": 1e-06, "loss": 0.024, "step": 93 }, { "clip_ratio/high_max": 0.0025237340450985357, "clip_ratio/high_mean": 0.0011239920822845306, "clip_ratio/low_mean": 0.0013649618667841423, "clip_ratio/low_min": 0.00015807248564669862, "clip_ratio/region_mean": 0.002488953970896546, "epoch": 1.2798833819241984, "grad_norm": 0.13355687260627747, "learning_rate": 1e-06, "loss": 0.0121, "step": 94 }, { "clip_ratio/high_max": 0.002789656529785134, "clip_ratio/high_mean": 0.0011940061158384196, "clip_ratio/low_mean": 0.0012921697198180482, "clip_ratio/low_min": 0.00013409827261057217, "clip_ratio/region_mean": 0.0024861758211045526, "epoch": 1.2892128279883381, "grad_norm": 0.11790642887353897, "learning_rate": 1e-06, "loss": 0.0033, "step": 95 }, { "clip_ratio/high_max": 0.0025969174603233114, "clip_ratio/high_mean": 0.0011176285879628267, "clip_ratio/low_mean": 0.0013295766366354655, "clip_ratio/low_min": 0.00017343667241220828, "clip_ratio/region_mean": 0.0024472052027704194, "epoch": 1.2985422740524781, "grad_norm": 0.12013210356235504, "learning_rate": 1e-06, "loss": 0.0186, "step": 96 }, { "clip_ratio/high_max": 0.002730941509071272, "clip_ratio/high_mean": 0.0012301779170229565, "clip_ratio/low_mean": 0.0013543912064051256, "clip_ratio/low_min": 0.00025461651785008144, "clip_ratio/region_mean": 0.002584569127066061, "epoch": 1.3078717201166181, "grad_norm": 0.13513752818107605, "learning_rate": 1e-06, "loss": 0.0166, "step": 97 }, { "clip_ratio/high_max": 0.00276844174368307, "clip_ratio/high_mean": 0.0012111759460822213, "clip_ratio/low_mean": 0.0012890180332760792, "clip_ratio/low_min": 0.00014023223957337905, "clip_ratio/region_mean": 0.0025001940230140463, "epoch": 1.3172011661807579, "grad_norm": 0.12273237109184265, "learning_rate": 1e-06, "loss": -0.0035, "step": 98 }, { "clip_ratio/high_max": 0.0023963413841556758, "clip_ratio/high_mean": 0.0010976555058732629, "clip_ratio/low_mean": 0.0012034707215207163, "clip_ratio/low_min": 0.000233849114010809, "clip_ratio/region_mean": 0.002301126216480043, "epoch": 1.3265306122448979, "grad_norm": 0.11905274540185928, "learning_rate": 1e-06, "loss": 0.03, "step": 99 }, { "clip_ratio/high_max": 0.0026407210316392593, "clip_ratio/high_mean": 0.001146331040217774, "clip_ratio/low_mean": 0.0012838526636187453, "clip_ratio/low_min": 0.00028504238434834406, "clip_ratio/region_mean": 0.002430183667456731, "epoch": 1.3358600583090379, "grad_norm": 0.10808532685041428, "learning_rate": 1e-06, "loss": -0.0008, "step": 100 }, { "clip_ratio/high_max": 0.002646134111273568, "clip_ratio/high_mean": 0.001193745136333746, "clip_ratio/low_mean": 0.0011777418985730037, "clip_ratio/low_min": 0.00012790255914296722, "clip_ratio/region_mean": 0.0023714870258118026, "epoch": 1.3451895043731779, "grad_norm": 0.1202731505036354, "learning_rate": 1e-06, "loss": -0.0177, "step": 101 }, { "clip_ratio/high_max": 0.002772345149423927, "clip_ratio/high_mean": 0.0012068417308910284, "clip_ratio/low_mean": 0.0013185823318053735, "clip_ratio/low_min": 0.0002663707937244908, "clip_ratio/region_mean": 0.0025254240608774126, "epoch": 1.3545189504373178, "grad_norm": 0.1238364428281784, "learning_rate": 1e-06, "loss": 0.0313, "step": 102 }, { "clip_ratio/high_max": 0.0025565283285686746, "clip_ratio/high_mean": 0.001128703574067913, "clip_ratio/low_mean": 0.001262762634723913, "clip_ratio/low_min": 0.00019348273417563178, "clip_ratio/region_mean": 0.002391466245171614, "epoch": 1.3638483965014578, "grad_norm": 0.12007968127727509, "learning_rate": 1e-06, "loss": 0.0374, "step": 103 }, { "clip_ratio/high_max": 0.002913759570219554, "clip_ratio/high_mean": 0.0012902501111966558, "clip_ratio/low_mean": 0.001323579916061135, "clip_ratio/low_min": 0.00014899642701493576, "clip_ratio/region_mean": 0.0026138300163438544, "epoch": 1.3731778425655976, "grad_norm": 0.11830270290374756, "learning_rate": 1e-06, "loss": -0.0538, "step": 104 }, { "clip_ratio/high_max": 0.002359229707508348, "clip_ratio/high_mean": 0.001132728015363682, "clip_ratio/low_mean": 0.0014911993275745772, "clip_ratio/low_min": 0.0002770377668639412, "clip_ratio/region_mean": 0.002623927350214217, "epoch": 1.3825072886297376, "grad_norm": 0.13317887485027313, "learning_rate": 1e-06, "loss": 0.0613, "step": 105 }, { "clip_ratio/high_max": 0.002849008917110041, "clip_ratio/high_mean": 0.0013902151185902767, "clip_ratio/low_mean": 0.0012934510014019907, "clip_ratio/low_min": 0.00018193295545643196, "clip_ratio/region_mean": 0.002683666090888437, "epoch": 1.3918367346938776, "grad_norm": 0.1259160339832306, "learning_rate": 1e-06, "loss": -0.0508, "step": 106 }, { "clip_ratio/high_max": 0.0025456000585108995, "clip_ratio/high_mean": 0.0010939477979263756, "clip_ratio/low_mean": 0.0012786947954737116, "clip_ratio/low_min": 0.00023171612610894954, "clip_ratio/region_mean": 0.0023726425642962568, "epoch": 1.4011661807580174, "grad_norm": 0.12226609140634537, "learning_rate": 1e-06, "loss": 0.0334, "step": 107 }, { "clip_ratio/high_max": 0.0025956555691664107, "clip_ratio/high_mean": 0.0012804821199097205, "clip_ratio/low_mean": 0.001096187344955979, "clip_ratio/low_min": 4.2086021494469605e-05, "clip_ratio/region_mean": 0.0023766695157974027, "epoch": 1.4104956268221573, "grad_norm": 0.12037496268749237, "learning_rate": 1e-06, "loss": -0.0331, "step": 108 }, { "clip_ratio/high_max": 0.002631728450069204, "clip_ratio/high_mean": 0.0013049101835349575, "clip_ratio/low_mean": 0.0012595389234775212, "clip_ratio/low_min": 0.00012353983856883133, "clip_ratio/region_mean": 0.0025644490160630085, "epoch": 1.4198250728862973, "grad_norm": 0.11882280558347702, "learning_rate": 1e-06, "loss": -0.0165, "step": 109 }, { "clip_ratio/high_max": 0.002339031118026469, "clip_ratio/high_mean": 0.0010034456208813936, "clip_ratio/low_mean": 0.0013370133092394099, "clip_ratio/low_min": 0.00019935806267312728, "clip_ratio/region_mean": 0.0023404588937410153, "epoch": 1.4291545189504373, "grad_norm": 0.11937922984361649, "learning_rate": 1e-06, "loss": 0.0289, "step": 110 }, { "clip_ratio/high_max": 0.0025980642822105438, "clip_ratio/high_mean": 0.001102815480408026, "clip_ratio/low_mean": 0.0013320943726284895, "clip_ratio/low_min": 0.00018518740307627013, "clip_ratio/region_mean": 0.0024349098675884306, "epoch": 1.4384839650145773, "grad_norm": 0.12355878204107285, "learning_rate": 1e-06, "loss": 0.0311, "step": 111 }, { "clip_ratio/high_max": 0.0027734357863664627, "clip_ratio/high_mean": 0.00125026136811357, "clip_ratio/low_mean": 0.0013873162351956125, "clip_ratio/low_min": 0.00022643257216259371, "clip_ratio/region_mean": 0.0026375776360509917, "epoch": 1.4478134110787173, "grad_norm": 0.13517841696739197, "learning_rate": 1e-06, "loss": -0.0237, "step": 112 }, { "clip_ratio/high_max": 0.0028571598086273298, "clip_ratio/high_mean": 0.0012286463352211285, "clip_ratio/low_mean": 0.0012820788542740047, "clip_ratio/low_min": 0.00010312809172319248, "clip_ratio/region_mean": 0.002510725178581197, "epoch": 1.457142857142857, "grad_norm": 0.13367316126823425, "learning_rate": 1e-06, "loss": -0.0258, "step": 113 }, { "clip_ratio/high_max": 0.0026264126936439425, "clip_ratio/high_mean": 0.0013097691626171581, "clip_ratio/low_mean": 0.0011364430793037172, "clip_ratio/low_min": 0.0001136418832174968, "clip_ratio/region_mean": 0.002446212231006939, "epoch": 1.466472303206997, "grad_norm": 0.12540413439273834, "learning_rate": 1e-06, "loss": -0.0729, "step": 114 }, { "clip_ratio/high_max": 0.0027379567764000967, "clip_ratio/high_mean": 0.001255874401977053, "clip_ratio/low_mean": 0.0014245363781810738, "clip_ratio/low_min": 0.00013933339687355328, "clip_ratio/region_mean": 0.0026804107474163175, "epoch": 1.475801749271137, "grad_norm": 0.12620235979557037, "learning_rate": 1e-06, "loss": -0.0501, "step": 115 }, { "clip_ratio/high_max": 0.0027128457950311713, "clip_ratio/high_mean": 0.001143619228969328, "clip_ratio/low_mean": 0.0015026697838038672, "clip_ratio/low_min": 0.0003423382295295596, "clip_ratio/region_mean": 0.0026462890236871317, "epoch": 1.485131195335277, "grad_norm": 0.13259916007518768, "learning_rate": 1e-06, "loss": 0.0598, "step": 116 }, { "clip_ratio/high_max": 0.002739972267590929, "clip_ratio/high_mean": 0.0011430823360569775, "clip_ratio/low_mean": 0.00152621894449112, "clip_ratio/low_min": 0.00020008663068438182, "clip_ratio/region_mean": 0.0026693012623582035, "epoch": 1.4944606413994168, "grad_norm": 0.1265629231929779, "learning_rate": 1e-06, "loss": 0.0165, "step": 117 }, { "clip_ratio/high_max": 0.0026464135371497832, "clip_ratio/high_mean": 0.0011411299274186604, "clip_ratio/low_mean": 0.0016658984532114118, "clip_ratio/low_min": 0.0003561044750313158, "clip_ratio/region_mean": 0.002807028402457945, "epoch": 1.5037900874635568, "grad_norm": 0.12272118777036667, "learning_rate": 1e-06, "loss": 0.0238, "step": 118 }, { "clip_ratio/high_max": 0.0023708494918537326, "clip_ratio/high_mean": 0.001126310646213824, "clip_ratio/low_mean": 0.001422655645001214, "clip_ratio/low_min": 0.0002803808511089301, "clip_ratio/region_mean": 0.0025489662803011015, "epoch": 1.5131195335276968, "grad_norm": 0.11832080781459808, "learning_rate": 1e-06, "loss": 0.0022, "step": 119 }, { "clip_ratio/high_max": 0.0027115305565530434, "clip_ratio/high_mean": 0.0011410775823605945, "clip_ratio/low_mean": 0.001546949762996519, "clip_ratio/low_min": 0.00015985599748091772, "clip_ratio/region_mean": 0.00268802738719387, "epoch": 1.5224489795918368, "grad_norm": 0.12926329672336578, "learning_rate": 1e-06, "loss": -0.0014, "step": 120 }, { "clip_ratio/high_max": 0.0024924994722823612, "clip_ratio/high_mean": 0.0011002993305737618, "clip_ratio/low_mean": 0.0016539994103368372, "clip_ratio/low_min": 0.0002064968457489158, "clip_ratio/region_mean": 0.0027542986936168745, "epoch": 1.5317784256559768, "grad_norm": 0.12762393057346344, "learning_rate": 1e-06, "loss": 0.0508, "step": 121 }, { "clip_ratio/high_max": 0.0027188550302525982, "clip_ratio/high_mean": 0.0011645578233583365, "clip_ratio/low_mean": 0.001327578615018865, "clip_ratio/low_min": 0.00012605087704287143, "clip_ratio/region_mean": 0.002492136452929117, "epoch": 1.5411078717201168, "grad_norm": 0.11825843900442123, "learning_rate": 1e-06, "loss": -0.0314, "step": 122 }, { "clip_ratio/high_max": 0.0026440328801982105, "clip_ratio/high_mean": 0.001086208168999292, "clip_ratio/low_mean": 0.0014585714488930535, "clip_ratio/low_min": 0.00016493189650645945, "clip_ratio/region_mean": 0.002544779628806282, "epoch": 1.5504373177842565, "grad_norm": 0.11494413763284683, "learning_rate": 1e-06, "loss": 0.0197, "step": 123 }, { "clip_ratio/high_max": 0.0025432173133594915, "clip_ratio/high_mean": 0.001221496258949628, "clip_ratio/low_mean": 0.0014564773591700941, "clip_ratio/low_min": 0.00016185795720957685, "clip_ratio/region_mean": 0.0026779736144817434, "epoch": 1.5597667638483965, "grad_norm": 0.12496867775917053, "learning_rate": 1e-06, "loss": -0.0414, "step": 124 }, { "clip_ratio/high_max": 0.0027226682868786156, "clip_ratio/high_mean": 0.0011761972891690675, "clip_ratio/low_mean": 0.001594664470758289, "clip_ratio/low_min": 0.0003164536065014545, "clip_ratio/region_mean": 0.002770861829048954, "epoch": 1.5690962099125363, "grad_norm": 0.12851716578006744, "learning_rate": 1e-06, "loss": 0.0164, "step": 125 }, { "clip_ratio/high_max": 0.002813838582369499, "clip_ratio/high_mean": 0.0013134839355188888, "clip_ratio/low_mean": 0.001403712642058963, "clip_ratio/low_min": 0.00017359862067678478, "clip_ratio/region_mean": 0.0027171965411980636, "epoch": 1.5784256559766763, "grad_norm": 0.12595194578170776, "learning_rate": 1e-06, "loss": -0.0183, "step": 126 }, { "clip_ratio/high_max": 0.002468951170158107, "clip_ratio/high_mean": 0.0011238552433496807, "clip_ratio/low_mean": 0.0013457736095006112, "clip_ratio/low_min": 0.00025545547032379545, "clip_ratio/region_mean": 0.002469628867402207, "epoch": 1.5877551020408163, "grad_norm": 0.12416993081569672, "learning_rate": 1e-06, "loss": 0.0003, "step": 127 }, { "clip_ratio/high_max": 0.002700058714253828, "clip_ratio/high_mean": 0.001289540250581922, "clip_ratio/low_mean": 0.0016328505371348, "clip_ratio/low_min": 0.0003622167514549801, "clip_ratio/region_mean": 0.002922390791354701, "epoch": 1.5970845481049563, "grad_norm": 0.14150327444076538, "learning_rate": 1e-06, "loss": 0.0272, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016688755580357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 621.1257934570312, "completions/mean_terminated_length": 562.1502075195312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 2.00932944606414, "grad_norm": 0.117433562874794, "learning_rate": 1e-06, "loss": 0.0143, "num_tokens": 111420498.0, "reward": 0.5467703938484192, "reward_std": 0.22544994950294495, "rewards/simpleverify_reward/mean": 0.5467703938484192, "rewards/simpleverify_reward/std": 0.49781206250190735, "step": 129 }, { "clip_ratio/high_max": 0.002604475354019087, "clip_ratio/high_mean": 0.0010097449339809828, "clip_ratio/low_mean": 0.0006020423088557436, "clip_ratio/low_min": 5.247698936727829e-05, "clip_ratio/region_mean": 0.001611787243746221, "epoch": 2.01865889212828, "grad_norm": 0.13862793147563934, "learning_rate": 1e-06, "loss": 0.0176, "step": 130 }, { "clip_ratio/high_max": 0.00264801752200583, "clip_ratio/high_mean": 0.0010852071100089233, "clip_ratio/low_mean": 0.0007682156174269039, "clip_ratio/low_min": 8.871639238350326e-05, "clip_ratio/region_mean": 0.001853422712883912, "epoch": 2.02798833819242, "grad_norm": 0.13071204721927643, "learning_rate": 1e-06, "loss": 0.0274, "step": 131 }, { "clip_ratio/high_max": 0.002122259458701592, "clip_ratio/high_mean": 0.0010517163027543575, "clip_ratio/low_mean": 0.0007108729969331762, "clip_ratio/low_min": 7.806723351677647e-05, "clip_ratio/region_mean": 0.0017625892942305654, "epoch": 2.03731778425656, "grad_norm": 0.12775462865829468, "learning_rate": 1e-06, "loss": -0.002, "step": 132 }, { "clip_ratio/high_max": 0.0023499272392655257, "clip_ratio/high_mean": 0.0011211607252334943, "clip_ratio/low_mean": 0.0006883806290716166, "clip_ratio/low_min": 0.00010209465199295664, "clip_ratio/region_mean": 0.0018095413834089413, "epoch": 2.0466472303206995, "grad_norm": 0.1324433982372284, "learning_rate": 1e-06, "loss": -0.0079, "step": 133 }, { "clip_ratio/high_max": 0.0022856025461805984, "clip_ratio/high_mean": 0.0010311819933122024, "clip_ratio/low_mean": 0.0008853242634359049, "clip_ratio/low_min": 3.5076616768492386e-05, "clip_ratio/region_mean": 0.001916506233101245, "epoch": 2.0559766763848395, "grad_norm": 0.11901295185089111, "learning_rate": 1e-06, "loss": 0.0205, "step": 134 }, { "clip_ratio/high_max": 0.0026263585314154625, "clip_ratio/high_mean": 0.0009817987520364113, "clip_ratio/low_mean": 0.0007800115417921916, "clip_ratio/low_min": 2.192982537962962e-05, "clip_ratio/region_mean": 0.0017618103011045605, "epoch": 2.0653061224489795, "grad_norm": 0.11027567833662033, "learning_rate": 1e-06, "loss": -0.0069, "step": 135 }, { "clip_ratio/high_max": 0.0024101691669784486, "clip_ratio/high_mean": 0.0011272243682469707, "clip_ratio/low_mean": 0.0010336852465115953, "clip_ratio/low_min": 0.00010650556123437127, "clip_ratio/region_mean": 0.0021609096438623965, "epoch": 2.0746355685131195, "grad_norm": 0.1345003843307495, "learning_rate": 1e-06, "loss": 0.0049, "step": 136 }, { "clip_ratio/high_max": 0.002551914469222538, "clip_ratio/high_mean": 0.0011151861217513215, "clip_ratio/low_mean": 0.0009470010681980057, "clip_ratio/low_min": 8.230858020397136e-05, "clip_ratio/region_mean": 0.00206218718085438, "epoch": 2.0839650145772595, "grad_norm": 0.12139299511909485, "learning_rate": 1e-06, "loss": -0.0005, "step": 137 }, { "clip_ratio/high_max": 0.0024498684288118966, "clip_ratio/high_mean": 0.0011389465907996055, "clip_ratio/low_mean": 0.0008113535404845607, "clip_ratio/low_min": 7.063543580443365e-05, "clip_ratio/region_mean": 0.001950300153112039, "epoch": 2.0932944606413995, "grad_norm": 0.12825439870357513, "learning_rate": 1e-06, "loss": -0.0274, "step": 138 }, { "clip_ratio/high_max": 0.0026781300475704484, "clip_ratio/high_mean": 0.0011479368913569488, "clip_ratio/low_mean": 0.0009903935588226886, "clip_ratio/low_min": 9.716414569993503e-05, "clip_ratio/region_mean": 0.002138330506568309, "epoch": 2.1026239067055394, "grad_norm": 0.12423088401556015, "learning_rate": 1e-06, "loss": 0.0035, "step": 139 }, { "clip_ratio/high_max": 0.002209495934948791, "clip_ratio/high_mean": 0.001039613678585738, "clip_ratio/low_mean": 0.000837922811115277, "clip_ratio/low_min": 7.025053128018044e-05, "clip_ratio/region_mean": 0.001877536516985856, "epoch": 2.1119533527696794, "grad_norm": 0.11786050349473953, "learning_rate": 1e-06, "loss": 0.0093, "step": 140 }, { "clip_ratio/high_max": 0.002277497696923092, "clip_ratio/high_mean": 0.0010755934345070273, "clip_ratio/low_mean": 0.0009602748850738863, "clip_ratio/low_min": 0.00013899358418711927, "clip_ratio/region_mean": 0.002035868397797458, "epoch": 2.1212827988338194, "grad_norm": 0.10998106747865677, "learning_rate": 1e-06, "loss": 0.0057, "step": 141 }, { "clip_ratio/high_max": 0.0026260833037667908, "clip_ratio/high_mean": 0.0011495222570374608, "clip_ratio/low_mean": 0.0007736221250524977, "clip_ratio/low_min": 2.812830098264385e-05, "clip_ratio/region_mean": 0.0019231443584430963, "epoch": 2.130612244897959, "grad_norm": 0.11774852871894836, "learning_rate": 1e-06, "loss": -0.0475, "step": 142 }, { "clip_ratio/high_max": 0.002484799741068855, "clip_ratio/high_mean": 0.001073298950359458, "clip_ratio/low_mean": 0.000964514612860512, "clip_ratio/low_min": 8.693009112903383e-05, "clip_ratio/region_mean": 0.0020378135741339065, "epoch": 2.139941690962099, "grad_norm": 0.11981480568647385, "learning_rate": 1e-06, "loss": -0.0008, "step": 143 }, { "clip_ratio/high_max": 0.0021002113207941875, "clip_ratio/high_mean": 0.0008897184779925738, "clip_ratio/low_mean": 0.0009667385347711388, "clip_ratio/low_min": 0.00014040692803973798, "clip_ratio/region_mean": 0.0018564570636954159, "epoch": 2.149271137026239, "grad_norm": 0.11887800693511963, "learning_rate": 1e-06, "loss": 0.0132, "step": 144 }, { "clip_ratio/high_max": 0.0024115627675200813, "clip_ratio/high_mean": 0.0010114257602253929, "clip_ratio/low_mean": 0.0009929460829880554, "clip_ratio/low_min": 7.398496745736338e-05, "clip_ratio/region_mean": 0.002004371846851427, "epoch": 2.158600583090379, "grad_norm": 0.11972886323928833, "learning_rate": 1e-06, "loss": 0.0076, "step": 145 }, { "clip_ratio/high_max": 0.0022257216442085337, "clip_ratio/high_mean": 0.0009576232823746977, "clip_ratio/low_mean": 0.0009820960804063361, "clip_ratio/low_min": 0.0001672759808570845, "clip_ratio/region_mean": 0.0019397193609620444, "epoch": 2.167930029154519, "grad_norm": 0.13610480725765228, "learning_rate": 1e-06, "loss": 0.0396, "step": 146 }, { "clip_ratio/high_max": 0.002450355350447353, "clip_ratio/high_mean": 0.001083731036487734, "clip_ratio/low_mean": 0.0010364038644183893, "clip_ratio/low_min": 7.660711708012968e-05, "clip_ratio/region_mean": 0.0021201349372859113, "epoch": 2.177259475218659, "grad_norm": 0.11820273846387863, "learning_rate": 1e-06, "loss": 0.0086, "step": 147 }, { "clip_ratio/high_max": 0.002981394987727981, "clip_ratio/high_mean": 0.0012385466507112142, "clip_ratio/low_mean": 0.0008997840905067278, "clip_ratio/low_min": 2.615828543639509e-05, "clip_ratio/region_mean": 0.0021383307830546983, "epoch": 2.186588921282799, "grad_norm": 0.12196576595306396, "learning_rate": 1e-06, "loss": -0.0092, "step": 148 }, { "clip_ratio/high_max": 0.002453926943417173, "clip_ratio/high_mean": 0.0010992179522872902, "clip_ratio/low_mean": 0.0010282481543981703, "clip_ratio/low_min": 2.8204158297739923e-05, "clip_ratio/region_mean": 0.0021274661121424288, "epoch": 2.195918367346939, "grad_norm": 0.11932481080293655, "learning_rate": 1e-06, "loss": -0.015, "step": 149 }, { "clip_ratio/high_max": 0.0022341832373058423, "clip_ratio/high_mean": 0.0010397633450338617, "clip_ratio/low_mean": 0.00110031434087432, "clip_ratio/low_min": 0.00010811330230353633, "clip_ratio/region_mean": 0.0021400777040980756, "epoch": 2.205247813411079, "grad_norm": 0.13031426072120667, "learning_rate": 1e-06, "loss": 0.0142, "step": 150 }, { "clip_ratio/high_max": 0.002100627498293761, "clip_ratio/high_mean": 0.0009728965815156698, "clip_ratio/low_mean": 0.001109271001041634, "clip_ratio/low_min": 0.00018426047336106421, "clip_ratio/region_mean": 0.0020821675861952826, "epoch": 2.2145772594752184, "grad_norm": 0.1201627254486084, "learning_rate": 1e-06, "loss": 0.0334, "step": 151 }, { "clip_ratio/high_max": 0.002272098499815911, "clip_ratio/high_mean": 0.0009493880716036074, "clip_ratio/low_mean": 0.0012250771833350882, "clip_ratio/low_min": 0.0001314482269663131, "clip_ratio/region_mean": 0.00217446521128295, "epoch": 2.2239067055393584, "grad_norm": 0.11559823900461197, "learning_rate": 1e-06, "loss": 0.0049, "step": 152 }, { "clip_ratio/high_max": 0.0023405511310556903, "clip_ratio/high_mean": 0.0010975574332405813, "clip_ratio/low_mean": 0.0011285115579084959, "clip_ratio/low_min": 0.00010950531759590376, "clip_ratio/region_mean": 0.0022260689511313103, "epoch": 2.2332361516034984, "grad_norm": 0.130247563123703, "learning_rate": 1e-06, "loss": 0.004, "step": 153 }, { "clip_ratio/high_max": 0.0025937355821952224, "clip_ratio/high_mean": 0.0010923339614237193, "clip_ratio/low_mean": 0.001415761093085166, "clip_ratio/low_min": 0.0001920558606798295, "clip_ratio/region_mean": 0.002508095058146864, "epoch": 2.2425655976676384, "grad_norm": 0.12192545086145401, "learning_rate": 1e-06, "loss": 0.0625, "step": 154 }, { "clip_ratio/high_max": 0.002497716595826205, "clip_ratio/high_mean": 0.0011746687414415646, "clip_ratio/low_mean": 0.0012749092384183314, "clip_ratio/low_min": 0.0001659430708969012, "clip_ratio/region_mean": 0.0024495779653079808, "epoch": 2.2518950437317784, "grad_norm": 0.1451082080602646, "learning_rate": 1e-06, "loss": 0.0093, "step": 155 }, { "clip_ratio/high_max": 0.002519707406463567, "clip_ratio/high_mean": 0.0011039472628908698, "clip_ratio/low_mean": 0.0011554806114872918, "clip_ratio/low_min": 6.777406906621763e-05, "clip_ratio/region_mean": 0.0022594279143959284, "epoch": 2.2612244897959184, "grad_norm": 0.12295163422822952, "learning_rate": 1e-06, "loss": -0.0061, "step": 156 }, { "clip_ratio/high_max": 0.0024124329356709495, "clip_ratio/high_mean": 0.0010858879431907553, "clip_ratio/low_mean": 0.0011228975854464807, "clip_ratio/low_min": 0.00012370092008495703, "clip_ratio/region_mean": 0.00220878554682713, "epoch": 2.2705539358600584, "grad_norm": 0.12332891672849655, "learning_rate": 1e-06, "loss": -0.0336, "step": 157 }, { "clip_ratio/high_max": 0.0023857227715780027, "clip_ratio/high_mean": 0.0010250295272271615, "clip_ratio/low_mean": 0.0011704927565006074, "clip_ratio/low_min": 0.0001437042319594184, "clip_ratio/region_mean": 0.002195522283727769, "epoch": 2.2798833819241984, "grad_norm": 0.12864260375499725, "learning_rate": 1e-06, "loss": 0.0402, "step": 158 }, { "clip_ratio/high_max": 0.0025151486552204005, "clip_ratio/high_mean": 0.0011685123172355816, "clip_ratio/low_mean": 0.0012465614381653722, "clip_ratio/low_min": 0.00018614440705277957, "clip_ratio/region_mean": 0.002415073788142763, "epoch": 2.2892128279883384, "grad_norm": 0.12569065392017365, "learning_rate": 1e-06, "loss": 0.0289, "step": 159 }, { "clip_ratio/high_max": 0.0025789209248614497, "clip_ratio/high_mean": 0.001108487436795258, "clip_ratio/low_mean": 0.0011688708073052112, "clip_ratio/low_min": 0.00015057267228257842, "clip_ratio/region_mean": 0.002277358216815628, "epoch": 2.298542274052478, "grad_norm": 0.12745057046413422, "learning_rate": 1e-06, "loss": -0.012, "step": 160 }, { "clip_ratio/high_max": 0.0026628526757122017, "clip_ratio/high_mean": 0.0010252606225549243, "clip_ratio/low_mean": 0.0010789855259645265, "clip_ratio/low_min": 0.00017235779523616657, "clip_ratio/region_mean": 0.0021042461157776415, "epoch": 2.307871720116618, "grad_norm": 0.11011394113302231, "learning_rate": 1e-06, "loss": -0.0114, "step": 161 }, { "clip_ratio/high_max": 0.0024281475925818086, "clip_ratio/high_mean": 0.0011097267633886077, "clip_ratio/low_mean": 0.001112452879169723, "clip_ratio/low_min": 7.253573585330741e-05, "clip_ratio/region_mean": 0.0022221796316443942, "epoch": 2.317201166180758, "grad_norm": 0.11440815031528473, "learning_rate": 1e-06, "loss": 0.0021, "step": 162 }, { "clip_ratio/high_max": 0.0027596873696893454, "clip_ratio/high_mean": 0.0012952804318047129, "clip_ratio/low_mean": 0.001032649219268933, "clip_ratio/low_min": 8.360037463717163e-05, "clip_ratio/region_mean": 0.0023279297311091796, "epoch": 2.326530612244898, "grad_norm": 0.13623954355716705, "learning_rate": 1e-06, "loss": -0.0408, "step": 163 }, { "clip_ratio/high_max": 0.0025571161750121973, "clip_ratio/high_mean": 0.0011863762665598188, "clip_ratio/low_mean": 0.0010855233413167298, "clip_ratio/low_min": 0.00010149566878681071, "clip_ratio/region_mean": 0.0022718996115145274, "epoch": 2.335860058309038, "grad_norm": 0.12146523594856262, "learning_rate": 1e-06, "loss": -0.009, "step": 164 }, { "clip_ratio/high_max": 0.0023731513720122166, "clip_ratio/high_mean": 0.0010484574231668375, "clip_ratio/low_mean": 0.0011893968248841702, "clip_ratio/low_min": 0.00021224252031970536, "clip_ratio/region_mean": 0.002237854241684545, "epoch": 2.345189504373178, "grad_norm": 0.12812286615371704, "learning_rate": 1e-06, "loss": 0.0263, "step": 165 }, { "clip_ratio/high_max": 0.002815638297761325, "clip_ratio/high_mean": 0.0012737084653053898, "clip_ratio/low_mean": 0.001155774309154367, "clip_ratio/low_min": 8.2797123468481e-05, "clip_ratio/region_mean": 0.00242948282539146, "epoch": 2.354518950437318, "grad_norm": 0.14721983671188354, "learning_rate": 1e-06, "loss": 0.0212, "step": 166 }, { "clip_ratio/high_max": 0.0026847347908187658, "clip_ratio/high_mean": 0.0011205359696759842, "clip_ratio/low_mean": 0.0013116085028741509, "clip_ratio/low_min": 0.00010896177991526201, "clip_ratio/region_mean": 0.002432144472550135, "epoch": 2.363848396501458, "grad_norm": 0.13157665729522705, "learning_rate": 1e-06, "loss": 0.0157, "step": 167 }, { "clip_ratio/high_max": 0.0032510058663319796, "clip_ratio/high_mean": 0.0013470837984641548, "clip_ratio/low_mean": 0.0011965052653977182, "clip_ratio/low_min": 0.00017041550654539606, "clip_ratio/region_mean": 0.0025435890711378306, "epoch": 2.373177842565598, "grad_norm": 0.12876376509666443, "learning_rate": 1e-06, "loss": -0.0324, "step": 168 }, { "clip_ratio/high_max": 0.002307305403519422, "clip_ratio/high_mean": 0.001048291931510903, "clip_ratio/low_mean": 0.001503323615906993, "clip_ratio/low_min": 0.0001809802015486639, "clip_ratio/region_mean": 0.002551615543779917, "epoch": 2.3825072886297374, "grad_norm": 0.11819365620613098, "learning_rate": 1e-06, "loss": 0.0003, "step": 169 }, { "clip_ratio/high_max": 0.0024908224149839953, "clip_ratio/high_mean": 0.001245201681740582, "clip_ratio/low_mean": 0.0011851935705635697, "clip_ratio/low_min": 6.285757990553975e-05, "clip_ratio/region_mean": 0.002430395245028194, "epoch": 2.3918367346938774, "grad_norm": 0.1131335124373436, "learning_rate": 1e-06, "loss": -0.0308, "step": 170 }, { "clip_ratio/high_max": 0.00256960164551856, "clip_ratio/high_mean": 0.0012161276208644267, "clip_ratio/low_mean": 0.0012400449450069573, "clip_ratio/low_min": 0.0001294164349019411, "clip_ratio/region_mean": 0.0024561725804232992, "epoch": 2.4011661807580174, "grad_norm": 0.13580158352851868, "learning_rate": 1e-06, "loss": -0.0075, "step": 171 }, { "clip_ratio/high_max": 0.0027758334763348103, "clip_ratio/high_mean": 0.001231379010278033, "clip_ratio/low_mean": 0.0015014320160844363, "clip_ratio/low_min": 0.00014884592201269697, "clip_ratio/region_mean": 0.002732811088208109, "epoch": 2.4104956268221573, "grad_norm": 0.12529368698596954, "learning_rate": 1e-06, "loss": 0.0067, "step": 172 }, { "clip_ratio/high_max": 0.002594049474282656, "clip_ratio/high_mean": 0.0012897534725198057, "clip_ratio/low_mean": 0.0014303950920293573, "clip_ratio/low_min": 0.00011217205792490859, "clip_ratio/region_mean": 0.0027201485718251206, "epoch": 2.4198250728862973, "grad_norm": 0.12434151768684387, "learning_rate": 1e-06, "loss": 0.021, "step": 173 }, { "clip_ratio/high_max": 0.002626042078190949, "clip_ratio/high_mean": 0.001195309447211912, "clip_ratio/low_mean": 0.0013356239687709603, "clip_ratio/low_min": 0.00014563699096470373, "clip_ratio/region_mean": 0.0025309334087069146, "epoch": 2.4291545189504373, "grad_norm": 0.12320226430892944, "learning_rate": 1e-06, "loss": -0.0107, "step": 174 }, { "clip_ratio/high_max": 0.0025353476012242027, "clip_ratio/high_mean": 0.0011494746577227488, "clip_ratio/low_mean": 0.0012201142781123053, "clip_ratio/low_min": 0.000194438808648556, "clip_ratio/region_mean": 0.0023695889103692025, "epoch": 2.4384839650145773, "grad_norm": 0.12133309245109558, "learning_rate": 1e-06, "loss": 0.0077, "step": 175 }, { "clip_ratio/high_max": 0.002551100442360621, "clip_ratio/high_mean": 0.0011013783114321996, "clip_ratio/low_mean": 0.001278181738598505, "clip_ratio/low_min": 0.00016299128856189782, "clip_ratio/region_mean": 0.0023795600136509165, "epoch": 2.4478134110787173, "grad_norm": 0.11697214841842651, "learning_rate": 1e-06, "loss": -0.011, "step": 176 }, { "clip_ratio/high_max": 0.002325334346096497, "clip_ratio/high_mean": 0.0011147658078698441, "clip_ratio/low_mean": 0.0010799937226693146, "clip_ratio/low_min": 4.906066351395566e-05, "clip_ratio/region_mean": 0.0021947595305391587, "epoch": 2.4571428571428573, "grad_norm": 0.12206520885229111, "learning_rate": 1e-06, "loss": -0.011, "step": 177 }, { "clip_ratio/high_max": 0.002367674111155793, "clip_ratio/high_mean": 0.001087657799871522, "clip_ratio/low_mean": 0.0012481089543143753, "clip_ratio/low_min": 0.0002468786715326132, "clip_ratio/region_mean": 0.002335766730539035, "epoch": 2.466472303206997, "grad_norm": 0.12972880899906158, "learning_rate": 1e-06, "loss": 0.0494, "step": 178 }, { "clip_ratio/high_max": 0.0028937013885297347, "clip_ratio/high_mean": 0.0012001742725260556, "clip_ratio/low_mean": 0.0011895055395143572, "clip_ratio/low_min": 0.00017757929708750453, "clip_ratio/region_mean": 0.0023896798520581797, "epoch": 2.4758017492711373, "grad_norm": 0.12528245151042938, "learning_rate": 1e-06, "loss": 0.0275, "step": 179 }, { "clip_ratio/high_max": 0.002909040755184833, "clip_ratio/high_mean": 0.0012443441919458564, "clip_ratio/low_mean": 0.0010223880271951202, "clip_ratio/low_min": 7.617511073476635e-05, "clip_ratio/region_mean": 0.00226673227007268, "epoch": 2.485131195335277, "grad_norm": 0.11673883348703384, "learning_rate": 1e-06, "loss": -0.0137, "step": 180 }, { "clip_ratio/high_max": 0.0027894804734387435, "clip_ratio/high_mean": 0.0013034097573836334, "clip_ratio/low_mean": 0.0012337905136519112, "clip_ratio/low_min": 0.00013576465244113933, "clip_ratio/region_mean": 0.002537200241931714, "epoch": 2.494460641399417, "grad_norm": 0.12992659211158752, "learning_rate": 1e-06, "loss": 0.0102, "step": 181 }, { "clip_ratio/high_max": 0.0029747471562586725, "clip_ratio/high_mean": 0.0013314384959812742, "clip_ratio/low_mean": 0.001018367001961451, "clip_ratio/low_min": 8.735746359889163e-05, "clip_ratio/region_mean": 0.0023498054724768735, "epoch": 2.503790087463557, "grad_norm": 0.12337908893823624, "learning_rate": 1e-06, "loss": -0.0315, "step": 182 }, { "clip_ratio/high_max": 0.002388678018178325, "clip_ratio/high_mean": 0.000977828683971893, "clip_ratio/low_mean": 0.0012462238591979258, "clip_ratio/low_min": 0.00012972266449651215, "clip_ratio/region_mean": 0.0022240525649976917, "epoch": 2.513119533527697, "grad_norm": 0.1161714568734169, "learning_rate": 1e-06, "loss": -0.0029, "step": 183 }, { "clip_ratio/high_max": 0.0027139371595694683, "clip_ratio/high_mean": 0.0011963951837969944, "clip_ratio/low_mean": 0.0014414133238460636, "clip_ratio/low_min": 0.0002481808433003607, "clip_ratio/region_mean": 0.0026378084148745984, "epoch": 2.522448979591837, "grad_norm": 0.12418004870414734, "learning_rate": 1e-06, "loss": 0.0223, "step": 184 }, { "clip_ratio/high_max": 0.002537755797675345, "clip_ratio/high_mean": 0.0011132990730402526, "clip_ratio/low_mean": 0.0012987907139176968, "clip_ratio/low_min": 0.00025258928053517593, "clip_ratio/region_mean": 0.0024120898087858222, "epoch": 2.5317784256559768, "grad_norm": 0.10932054370641708, "learning_rate": 1e-06, "loss": 0.0057, "step": 185 }, { "clip_ratio/high_max": 0.0025301558707724325, "clip_ratio/high_mean": 0.0010961110274365637, "clip_ratio/low_mean": 0.0012796354512829566, "clip_ratio/low_min": 9.25321837712545e-05, "clip_ratio/region_mean": 0.00237574648053851, "epoch": 2.5411078717201168, "grad_norm": 0.11127616465091705, "learning_rate": 1e-06, "loss": 0.0173, "step": 186 }, { "clip_ratio/high_max": 0.002816690379404463, "clip_ratio/high_mean": 0.00122662079957081, "clip_ratio/low_mean": 0.0013554430515796412, "clip_ratio/low_min": 0.00013329072771739447, "clip_ratio/region_mean": 0.0025820638693403453, "epoch": 2.5504373177842563, "grad_norm": 0.12166678160429001, "learning_rate": 1e-06, "loss": -0.0123, "step": 187 }, { "clip_ratio/high_max": 0.0027079341089120135, "clip_ratio/high_mean": 0.0012754563576891087, "clip_ratio/low_mean": 0.0013666823324456345, "clip_ratio/low_min": 0.00017096808824135223, "clip_ratio/region_mean": 0.002642138693772722, "epoch": 2.5597667638483967, "grad_norm": 0.12692616879940033, "learning_rate": 1e-06, "loss": -0.0342, "step": 188 }, { "clip_ratio/high_max": 0.002358461577387061, "clip_ratio/high_mean": 0.0011824131706816843, "clip_ratio/low_mean": 0.0011677085858536884, "clip_ratio/low_min": 7.566150998172816e-05, "clip_ratio/region_mean": 0.0023501217874581926, "epoch": 2.5690962099125363, "grad_norm": 0.12305018305778503, "learning_rate": 1e-06, "loss": -0.032, "step": 189 }, { "clip_ratio/high_max": 0.0023616322796442546, "clip_ratio/high_mean": 0.0011595011492318008, "clip_ratio/low_mean": 0.001299076937357313, "clip_ratio/low_min": 0.00024115154883475043, "clip_ratio/region_mean": 0.002458578055666294, "epoch": 2.5784256559766763, "grad_norm": 0.1243428960442543, "learning_rate": 1e-06, "loss": 0.0027, "step": 190 }, { "clip_ratio/high_max": 0.0028254646531422623, "clip_ratio/high_mean": 0.001123283529523178, "clip_ratio/low_mean": 0.001313371496507898, "clip_ratio/low_min": 0.00016924353076319676, "clip_ratio/region_mean": 0.0024366549914702773, "epoch": 2.5877551020408163, "grad_norm": 0.12276402860879898, "learning_rate": 1e-06, "loss": 0.0246, "step": 191 }, { "clip_ratio/high_max": 0.002761791678494774, "clip_ratio/high_mean": 0.0012255481524334755, "clip_ratio/low_mean": 0.0012073713296558708, "clip_ratio/low_min": 0.0001706942430246272, "clip_ratio/region_mean": 0.002432919565762859, "epoch": 2.5970845481049563, "grad_norm": 0.12500406801700592, "learning_rate": 1e-06, "loss": -0.0294, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020106724330357095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 634.357177734375, "completions/mean_terminated_length": 563.32666015625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.00932944606414, "grad_norm": 0.14026591181755066, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 148897704.0, "reward": 0.5502232313156128, "reward_std": 0.21750041842460632, "rewards/simpleverify_reward/mean": 0.5502232313156128, "rewards/simpleverify_reward/std": 0.49747559428215027, "step": 193 }, { "clip_ratio/high_max": 0.002061748346022796, "clip_ratio/high_mean": 0.000871242256835103, "clip_ratio/low_mean": 0.0006570765872311313, "clip_ratio/low_min": 4.9448425670561846e-05, "clip_ratio/region_mean": 0.0015283188258763403, "epoch": 3.01865889212828, "grad_norm": 0.1298675686120987, "learning_rate": 1e-06, "loss": 0.069, "step": 194 }, { "clip_ratio/high_max": 0.0021520642694667913, "clip_ratio/high_mean": 0.0009898013740894385, "clip_ratio/low_mean": 0.0005682344035449205, "clip_ratio/low_min": 2.4069012397376355e-05, "clip_ratio/region_mean": 0.0015580357830913272, "epoch": 3.02798833819242, "grad_norm": 0.12610502541065216, "learning_rate": 1e-06, "loss": -0.0041, "step": 195 }, { "clip_ratio/high_max": 0.0024360017268918455, "clip_ratio/high_mean": 0.0010614840466587339, "clip_ratio/low_mean": 0.0006560986976182903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017175827670143917, "epoch": 3.03731778425656, "grad_norm": 0.1263195127248764, "learning_rate": 1e-06, "loss": -0.0162, "step": 196 }, { "clip_ratio/high_max": 0.0024873623697203584, "clip_ratio/high_mean": 0.0010059822707262356, "clip_ratio/low_mean": 0.0006211861041265365, "clip_ratio/low_min": 5.088131820230046e-05, "clip_ratio/region_mean": 0.0016271683998638764, "epoch": 3.0466472303206995, "grad_norm": 0.12883612513542175, "learning_rate": 1e-06, "loss": 0.0115, "step": 197 }, { "clip_ratio/high_max": 0.0022831738024251536, "clip_ratio/high_mean": 0.0008958075559348799, "clip_ratio/low_mean": 0.0007660653791390359, "clip_ratio/low_min": 8.209856878238497e-05, "clip_ratio/region_mean": 0.0016618729350739159, "epoch": 3.0559766763848395, "grad_norm": 0.12781526148319244, "learning_rate": 1e-06, "loss": 0.1007, "step": 198 }, { "clip_ratio/high_max": 0.002296483340614941, "clip_ratio/high_mean": 0.0009581223021086771, "clip_ratio/low_mean": 0.0008079199051280739, "clip_ratio/low_min": 7.637055568920914e-05, "clip_ratio/region_mean": 0.0017660422236076556, "epoch": 3.0653061224489795, "grad_norm": 0.11599215865135193, "learning_rate": 1e-06, "loss": 0.0101, "step": 199 }, { "clip_ratio/high_max": 0.0023743426500004716, "clip_ratio/high_mean": 0.001073000101314392, "clip_ratio/low_mean": 0.000863245242726407, "clip_ratio/low_min": 5.384240830608178e-05, "clip_ratio/region_mean": 0.0019362453313078731, "epoch": 3.0746355685131195, "grad_norm": 0.1352672278881073, "learning_rate": 1e-06, "loss": 0.0064, "step": 200 }, { "clip_ratio/high_max": 0.002242562113679014, "clip_ratio/high_mean": 0.0009041177800099831, "clip_ratio/low_mean": 0.0008844915282679722, "clip_ratio/low_min": 7.005702718743123e-05, "clip_ratio/region_mean": 0.0017886093191918917, "epoch": 3.0839650145772595, "grad_norm": 0.11848468333482742, "learning_rate": 1e-06, "loss": 0.0065, "step": 201 }, { "clip_ratio/high_max": 0.0027339655498508364, "clip_ratio/high_mean": 0.0011076306145696435, "clip_ratio/low_mean": 0.0009033701608132105, "clip_ratio/low_min": 4.685640669777058e-05, "clip_ratio/region_mean": 0.002011000833590515, "epoch": 3.0932944606413995, "grad_norm": 0.11213834583759308, "learning_rate": 1e-06, "loss": -0.014, "step": 202 }, { "clip_ratio/high_max": 0.0023162214856711216, "clip_ratio/high_mean": 0.001049979320669081, "clip_ratio/low_mean": 0.0010175596817134647, "clip_ratio/low_min": 0.00015259493011399172, "clip_ratio/region_mean": 0.0020675390187534504, "epoch": 3.1026239067055394, "grad_norm": 0.12015374004840851, "learning_rate": 1e-06, "loss": 0.0422, "step": 203 }, { "clip_ratio/high_max": 0.0027470207278383896, "clip_ratio/high_mean": 0.001183155553007964, "clip_ratio/low_mean": 0.0008524330405634828, "clip_ratio/low_min": 6.070850304240594e-05, "clip_ratio/region_mean": 0.002035588629951235, "epoch": 3.1119533527696794, "grad_norm": 0.12526558339595795, "learning_rate": 1e-06, "loss": 0.0043, "step": 204 }, { "clip_ratio/high_max": 0.002381809303187765, "clip_ratio/high_mean": 0.0009171813799184747, "clip_ratio/low_mean": 0.0010008566132455599, "clip_ratio/low_min": 0.00011786398226831807, "clip_ratio/region_mean": 0.0019180379385943525, "epoch": 3.1212827988338194, "grad_norm": 0.11298627406358719, "learning_rate": 1e-06, "loss": 0.0409, "step": 205 }, { "clip_ratio/high_max": 0.0023509277816629037, "clip_ratio/high_mean": 0.0010854686042875983, "clip_ratio/low_mean": 0.0010403982960269786, "clip_ratio/low_min": 0.0001352437393506989, "clip_ratio/region_mean": 0.0021258669075905345, "epoch": 3.130612244897959, "grad_norm": 0.11448032408952713, "learning_rate": 1e-06, "loss": 0.0028, "step": 206 }, { "clip_ratio/high_max": 0.002645508779096417, "clip_ratio/high_mean": 0.0011063714482588693, "clip_ratio/low_mean": 0.0009564560987200821, "clip_ratio/low_min": 0.00013615141688205767, "clip_ratio/region_mean": 0.0020628275960916653, "epoch": 3.139941690962099, "grad_norm": 0.1367618441581726, "learning_rate": 1e-06, "loss": 0.0169, "step": 207 }, { "clip_ratio/high_max": 0.0025479232353973202, "clip_ratio/high_mean": 0.0012087199647794478, "clip_ratio/low_mean": 0.0009755447499628644, "clip_ratio/low_min": 0.00010132720126421191, "clip_ratio/region_mean": 0.0021842647329322062, "epoch": 3.149271137026239, "grad_norm": 0.1251148134469986, "learning_rate": 1e-06, "loss": -0.0055, "step": 208 }, { "clip_ratio/high_max": 0.0026119275571545586, "clip_ratio/high_mean": 0.0011381316871847957, "clip_ratio/low_mean": 0.0010236974267172627, "clip_ratio/low_min": 0.00011333390102663543, "clip_ratio/region_mean": 0.0021618290993501432, "epoch": 3.158600583090379, "grad_norm": 0.11783840507268906, "learning_rate": 1e-06, "loss": -0.0065, "step": 209 }, { "clip_ratio/high_max": 0.002686173000256531, "clip_ratio/high_mean": 0.0011166190597577952, "clip_ratio/low_mean": 0.0010319768443878274, "clip_ratio/low_min": 0.00015408775379910367, "clip_ratio/region_mean": 0.0021485959077836014, "epoch": 3.167930029154519, "grad_norm": 0.12133133411407471, "learning_rate": 1e-06, "loss": -0.0118, "step": 210 }, { "clip_ratio/high_max": 0.0021286107366904616, "clip_ratio/high_mean": 0.000993017772998428, "clip_ratio/low_mean": 0.0009988674282794818, "clip_ratio/low_min": 7.460783126589376e-05, "clip_ratio/region_mean": 0.0019918852412956767, "epoch": 3.177259475218659, "grad_norm": 0.13970410823822021, "learning_rate": 1e-06, "loss": 0.0438, "step": 211 }, { "clip_ratio/high_max": 0.0022555009491043165, "clip_ratio/high_mean": 0.0011183221104147378, "clip_ratio/low_mean": 0.0009317137592006475, "clip_ratio/low_min": 8.494884968968108e-05, "clip_ratio/region_mean": 0.0020500359023571946, "epoch": 3.186588921282799, "grad_norm": 0.11689542233943939, "learning_rate": 1e-06, "loss": -0.0057, "step": 212 }, { "clip_ratio/high_max": 0.0025477413946646266, "clip_ratio/high_mean": 0.0011465173229225911, "clip_ratio/low_mean": 0.0008799154056760017, "clip_ratio/low_min": 8.384949524042895e-05, "clip_ratio/region_mean": 0.002026432746788487, "epoch": 3.195918367346939, "grad_norm": 0.12078435719013214, "learning_rate": 1e-06, "loss": -0.0247, "step": 213 }, { "clip_ratio/high_max": 0.002567364979768172, "clip_ratio/high_mean": 0.0010069786167150596, "clip_ratio/low_mean": 0.0009920025204337435, "clip_ratio/low_min": 0.00011640755474218167, "clip_ratio/region_mean": 0.00199898117716657, "epoch": 3.205247813411079, "grad_norm": 0.12187445163726807, "learning_rate": 1e-06, "loss": 0.0415, "step": 214 }, { "clip_ratio/high_max": 0.002537337342801038, "clip_ratio/high_mean": 0.0011145730495627504, "clip_ratio/low_mean": 0.0008835631269903388, "clip_ratio/low_min": 5.051165771874366e-05, "clip_ratio/region_mean": 0.0019981361292593647, "epoch": 3.2145772594752184, "grad_norm": 0.1302180290222168, "learning_rate": 1e-06, "loss": -0.0072, "step": 215 }, { "clip_ratio/high_max": 0.002595059428131208, "clip_ratio/high_mean": 0.0011486775511002634, "clip_ratio/low_mean": 0.0009252007530449191, "clip_ratio/low_min": 5.344779128790833e-05, "clip_ratio/region_mean": 0.0020738783132401295, "epoch": 3.2239067055393584, "grad_norm": 0.11370299011468887, "learning_rate": 1e-06, "loss": -0.0113, "step": 216 }, { "clip_ratio/high_max": 0.0026351970300311223, "clip_ratio/high_mean": 0.0011887289365404285, "clip_ratio/low_mean": 0.0009616375009500189, "clip_ratio/low_min": 5.4788140005257446e-05, "clip_ratio/region_mean": 0.0021503664465853944, "epoch": 3.2332361516034984, "grad_norm": 0.12059260904788971, "learning_rate": 1e-06, "loss": -0.0097, "step": 217 }, { "clip_ratio/high_max": 0.0022073932123021223, "clip_ratio/high_mean": 0.0010123799802386202, "clip_ratio/low_mean": 0.0010926084069069475, "clip_ratio/low_min": 0.00010800341442518402, "clip_ratio/region_mean": 0.002104988401697483, "epoch": 3.2425655976676384, "grad_norm": 0.1268319934606552, "learning_rate": 1e-06, "loss": 0.0427, "step": 218 }, { "clip_ratio/high_max": 0.0024583201957284473, "clip_ratio/high_mean": 0.0012200099808978848, "clip_ratio/low_mean": 0.0009574005161994137, "clip_ratio/low_min": 8.215757043217309e-05, "clip_ratio/region_mean": 0.0021774104825453833, "epoch": 3.2518950437317784, "grad_norm": 0.12496983259916306, "learning_rate": 1e-06, "loss": -0.012, "step": 219 }, { "clip_ratio/high_max": 0.002703683465369977, "clip_ratio/high_mean": 0.0011089182080468163, "clip_ratio/low_mean": 0.0011269765527686104, "clip_ratio/low_min": 9.889045850286493e-05, "clip_ratio/region_mean": 0.0022358947971952148, "epoch": 3.2612244897959184, "grad_norm": 0.12382709234952927, "learning_rate": 1e-06, "loss": 0.0077, "step": 220 }, { "clip_ratio/high_max": 0.0023699304219917394, "clip_ratio/high_mean": 0.0010214009125775192, "clip_ratio/low_mean": 0.0012914672552142292, "clip_ratio/low_min": 0.00020172488621028606, "clip_ratio/region_mean": 0.0023128682223614305, "epoch": 3.2705539358600584, "grad_norm": 0.13156366348266602, "learning_rate": 1e-06, "loss": 0.0273, "step": 221 }, { "clip_ratio/high_max": 0.0021955612755846232, "clip_ratio/high_mean": 0.001025475967253442, "clip_ratio/low_mean": 0.0012421034098224482, "clip_ratio/low_min": 0.00019581201377150137, "clip_ratio/region_mean": 0.0022675793879898265, "epoch": 3.2798833819241984, "grad_norm": 0.1161755695939064, "learning_rate": 1e-06, "loss": 0.0018, "step": 222 }, { "clip_ratio/high_max": 0.0029818642142345197, "clip_ratio/high_mean": 0.001216008182382211, "clip_ratio/low_mean": 0.001189883601909969, "clip_ratio/low_min": 0.0001064189327735221, "clip_ratio/region_mean": 0.0024058917915681377, "epoch": 3.2892128279883384, "grad_norm": 0.12272651493549347, "learning_rate": 1e-06, "loss": -0.0349, "step": 223 }, { "clip_ratio/high_max": 0.002536017753300257, "clip_ratio/high_mean": 0.0010533328058954794, "clip_ratio/low_mean": 0.0012618183500308078, "clip_ratio/low_min": 6.540621598105645e-05, "clip_ratio/region_mean": 0.0023151511340984143, "epoch": 3.298542274052478, "grad_norm": 0.12759383022785187, "learning_rate": 1e-06, "loss": -0.0036, "step": 224 }, { "clip_ratio/high_max": 0.00248715727502713, "clip_ratio/high_mean": 0.0012340993707766756, "clip_ratio/low_mean": 0.001422618228389183, "clip_ratio/low_min": 0.0003430717397350236, "clip_ratio/region_mean": 0.002656717610079795, "epoch": 3.307871720116618, "grad_norm": 0.12538866698741913, "learning_rate": 1e-06, "loss": -0.015, "step": 225 }, { "clip_ratio/high_max": 0.002855788894521538, "clip_ratio/high_mean": 0.0011752312384487595, "clip_ratio/low_mean": 0.0013954976457171142, "clip_ratio/low_min": 0.00013497537202056265, "clip_ratio/region_mean": 0.0025707288732519373, "epoch": 3.317201166180758, "grad_norm": 0.2173781841993332, "learning_rate": 1e-06, "loss": 0.0119, "step": 226 }, { "clip_ratio/high_max": 0.0023392430739477277, "clip_ratio/high_mean": 0.001059344613167923, "clip_ratio/low_mean": 0.0013196719337429386, "clip_ratio/low_min": 0.00021052706506452523, "clip_ratio/region_mean": 0.0023790165141690522, "epoch": 3.326530612244898, "grad_norm": 0.12303262948989868, "learning_rate": 1e-06, "loss": 0.0181, "step": 227 }, { "clip_ratio/high_max": 0.002634392280015163, "clip_ratio/high_mean": 0.0011596684671530966, "clip_ratio/low_mean": 0.001220979800564237, "clip_ratio/low_min": 0.00014960934277041815, "clip_ratio/region_mean": 0.0023806482640793547, "epoch": 3.335860058309038, "grad_norm": 0.12350213527679443, "learning_rate": 1e-06, "loss": 0.0138, "step": 228 }, { "clip_ratio/high_max": 0.0027777489740401506, "clip_ratio/high_mean": 0.0012519868578237947, "clip_ratio/low_mean": 0.0010895985979004763, "clip_ratio/low_min": 0.00019499014160828665, "clip_ratio/region_mean": 0.002341585510293953, "epoch": 3.345189504373178, "grad_norm": 0.12810732424259186, "learning_rate": 1e-06, "loss": 0.0115, "step": 229 }, { "clip_ratio/high_max": 0.0023193202359834686, "clip_ratio/high_mean": 0.0009734930645208806, "clip_ratio/low_mean": 0.0009347590639663395, "clip_ratio/low_min": 9.29479674596223e-05, "clip_ratio/region_mean": 0.0019082521539530717, "epoch": 3.354518950437318, "grad_norm": 0.12010464072227478, "learning_rate": 1e-06, "loss": -0.0183, "step": 230 }, { "clip_ratio/high_max": 0.0024950333499873523, "clip_ratio/high_mean": 0.0011771378412959166, "clip_ratio/low_mean": 0.0011222091561648995, "clip_ratio/low_min": 0.00018253801681566983, "clip_ratio/region_mean": 0.002299347033840604, "epoch": 3.363848396501458, "grad_norm": 0.12164637446403503, "learning_rate": 1e-06, "loss": 0.0025, "step": 231 }, { "clip_ratio/high_max": 0.0025469671163591556, "clip_ratio/high_mean": 0.001141063494287664, "clip_ratio/low_mean": 0.0010985283624904696, "clip_ratio/low_min": 0.0001385923442285275, "clip_ratio/region_mean": 0.0022395918422262184, "epoch": 3.373177842565598, "grad_norm": 0.11820098012685776, "learning_rate": 1e-06, "loss": 0.0093, "step": 232 }, { "clip_ratio/high_max": 0.0025209588202415034, "clip_ratio/high_mean": 0.000971782978012925, "clip_ratio/low_mean": 0.0012674398058152292, "clip_ratio/low_min": 0.00016506315932929283, "clip_ratio/region_mean": 0.0022392228347598575, "epoch": 3.3825072886297374, "grad_norm": 0.1263270229101181, "learning_rate": 1e-06, "loss": 0.0484, "step": 233 }, { "clip_ratio/high_max": 0.0025999623903771862, "clip_ratio/high_mean": 0.0010336642844777089, "clip_ratio/low_mean": 0.0012233874804223888, "clip_ratio/low_min": 0.0001128667072407552, "clip_ratio/region_mean": 0.002257051761262119, "epoch": 3.3918367346938774, "grad_norm": 0.1158517450094223, "learning_rate": 1e-06, "loss": -0.0161, "step": 234 }, { "clip_ratio/high_max": 0.0027295962936477736, "clip_ratio/high_mean": 0.0011273406016698573, "clip_ratio/low_mean": 0.0012824184886994772, "clip_ratio/low_min": 5.478709499584511e-05, "clip_ratio/region_mean": 0.002409759115835186, "epoch": 3.4011661807580174, "grad_norm": 0.12060651928186417, "learning_rate": 1e-06, "loss": 0.006, "step": 235 }, { "clip_ratio/high_max": 0.002363648010941688, "clip_ratio/high_mean": 0.0010719535202952102, "clip_ratio/low_mean": 0.0012619421759154648, "clip_ratio/low_min": 0.00010520220712351147, "clip_ratio/region_mean": 0.00233389571076259, "epoch": 3.4104956268221573, "grad_norm": 0.11047002673149109, "learning_rate": 1e-06, "loss": 0.0145, "step": 236 }, { "clip_ratio/high_max": 0.0028598464632523246, "clip_ratio/high_mean": 0.0012210337044962216, "clip_ratio/low_mean": 0.0014538453215209302, "clip_ratio/low_min": 0.00013639724420499988, "clip_ratio/region_mean": 0.0026748790478450246, "epoch": 3.4198250728862973, "grad_norm": 0.12310261279344559, "learning_rate": 1e-06, "loss": -0.0097, "step": 237 }, { "clip_ratio/high_max": 0.003119135377346538, "clip_ratio/high_mean": 0.0013212138073868118, "clip_ratio/low_mean": 0.0012602354945556726, "clip_ratio/low_min": 4.994714799977373e-05, "clip_ratio/region_mean": 0.002581449312856421, "epoch": 3.4291545189504373, "grad_norm": 0.12614542245864868, "learning_rate": 1e-06, "loss": -0.0207, "step": 238 }, { "clip_ratio/high_max": 0.002636818950122688, "clip_ratio/high_mean": 0.0011825242509075906, "clip_ratio/low_mean": 0.0013021347840549424, "clip_ratio/low_min": 0.0001502478298789356, "clip_ratio/region_mean": 0.002484659016772639, "epoch": 3.4384839650145773, "grad_norm": 0.11874942481517792, "learning_rate": 1e-06, "loss": -0.0475, "step": 239 }, { "clip_ratio/high_max": 0.00306760219973512, "clip_ratio/high_mean": 0.0014245340571505949, "clip_ratio/low_mean": 0.0015436406611115672, "clip_ratio/low_min": 0.0002461949588905554, "clip_ratio/region_mean": 0.0029681747837457806, "epoch": 3.4478134110787173, "grad_norm": 0.1389058381319046, "learning_rate": 1e-06, "loss": -0.0446, "step": 240 }, { "clip_ratio/high_max": 0.002759681534371339, "clip_ratio/high_mean": 0.0011857538902404485, "clip_ratio/low_mean": 0.0013369897751545068, "clip_ratio/low_min": 0.00013312562623468693, "clip_ratio/region_mean": 0.0025227436490240507, "epoch": 3.4571428571428573, "grad_norm": 0.12311290204524994, "learning_rate": 1e-06, "loss": -0.0382, "step": 241 }, { "clip_ratio/high_max": 0.0029041526722721756, "clip_ratio/high_mean": 0.0012738214209093712, "clip_ratio/low_mean": 0.0013352936330193188, "clip_ratio/low_min": 0.00010003879469877575, "clip_ratio/region_mean": 0.002609115093946457, "epoch": 3.466472303206997, "grad_norm": 0.1270136684179306, "learning_rate": 1e-06, "loss": 0.0007, "step": 242 }, { "clip_ratio/high_max": 0.002884062720113434, "clip_ratio/high_mean": 0.0012906028241559397, "clip_ratio/low_mean": 0.0011787245639425237, "clip_ratio/low_min": 6.64215040160343e-05, "clip_ratio/region_mean": 0.002469327358994633, "epoch": 3.4758017492711373, "grad_norm": 0.11275164037942886, "learning_rate": 1e-06, "loss": -0.0278, "step": 243 }, { "clip_ratio/high_max": 0.0024617731032776646, "clip_ratio/high_mean": 0.0012163817264081445, "clip_ratio/low_mean": 0.0012092451579519548, "clip_ratio/low_min": 0.00017671631940174848, "clip_ratio/region_mean": 0.002425626909825951, "epoch": 3.485131195335277, "grad_norm": 0.12209966778755188, "learning_rate": 1e-06, "loss": -0.014, "step": 244 }, { "clip_ratio/high_max": 0.0024834484211169183, "clip_ratio/high_mean": 0.0011175874242326245, "clip_ratio/low_mean": 0.0012580749589687912, "clip_ratio/low_min": 0.00012665904250752646, "clip_ratio/region_mean": 0.002375662421400193, "epoch": 3.494460641399417, "grad_norm": 0.13395462930202484, "learning_rate": 1e-06, "loss": 0.0237, "step": 245 }, { "clip_ratio/high_max": 0.003192849879269488, "clip_ratio/high_mean": 0.0013443793941405602, "clip_ratio/low_mean": 0.0013164814190531615, "clip_ratio/low_min": 0.00011386331061657984, "clip_ratio/region_mean": 0.002660860765899997, "epoch": 3.503790087463557, "grad_norm": 0.1278659701347351, "learning_rate": 1e-06, "loss": -0.0339, "step": 246 }, { "clip_ratio/high_max": 0.002776773377263453, "clip_ratio/high_mean": 0.0012837477552238852, "clip_ratio/low_mean": 0.001044681812345516, "clip_ratio/low_min": 1.19434362204629e-05, "clip_ratio/region_mean": 0.002328429589397274, "epoch": 3.513119533527697, "grad_norm": 0.12169931828975677, "learning_rate": 1e-06, "loss": -0.0478, "step": 247 }, { "clip_ratio/high_max": 0.002933812269475311, "clip_ratio/high_mean": 0.00136197380561498, "clip_ratio/low_mean": 0.0010882375408982625, "clip_ratio/low_min": 8.942526983446442e-05, "clip_ratio/region_mean": 0.0024502113883499987, "epoch": 3.522448979591837, "grad_norm": 0.13563691079616547, "learning_rate": 1e-06, "loss": -0.0443, "step": 248 }, { "clip_ratio/high_max": 0.002817699161823839, "clip_ratio/high_mean": 0.0012116857615183108, "clip_ratio/low_mean": 0.0011869966874655802, "clip_ratio/low_min": 0.00013031674552621553, "clip_ratio/region_mean": 0.0023986824016901664, "epoch": 3.5317784256559768, "grad_norm": 0.11663589626550674, "learning_rate": 1e-06, "loss": -0.0086, "step": 249 }, { "clip_ratio/high_max": 0.0026597510805004276, "clip_ratio/high_mean": 0.001287872190005146, "clip_ratio/low_mean": 0.0011019548092008336, "clip_ratio/low_min": 9.463294645684073e-05, "clip_ratio/region_mean": 0.0023898269719211385, "epoch": 3.5411078717201168, "grad_norm": 0.1313488930463791, "learning_rate": 1e-06, "loss": -0.0176, "step": 250 }, { "clip_ratio/high_max": 0.0026479246371309273, "clip_ratio/high_mean": 0.001119159955123905, "clip_ratio/low_mean": 0.0012401145868352614, "clip_ratio/low_min": 0.0001741686228342587, "clip_ratio/region_mean": 0.0023592745055793785, "epoch": 3.5504373177842563, "grad_norm": 0.12081325799226761, "learning_rate": 1e-06, "loss": -0.0012, "step": 251 }, { "clip_ratio/high_max": 0.002332132924493635, "clip_ratio/high_mean": 0.0010688346010283567, "clip_ratio/low_mean": 0.0015507310672546737, "clip_ratio/low_min": 5.5538073866046034e-05, "clip_ratio/region_mean": 0.0026195656246272847, "epoch": 3.5597667638483967, "grad_norm": 0.11730310320854187, "learning_rate": 1e-06, "loss": 0.0101, "step": 252 }, { "clip_ratio/high_max": 0.002911170980951283, "clip_ratio/high_mean": 0.0013587812572950497, "clip_ratio/low_mean": 0.001245073573954869, "clip_ratio/low_min": 8.216908281610813e-05, "clip_ratio/region_mean": 0.002603854867629707, "epoch": 3.5690962099125363, "grad_norm": 0.1244642436504364, "learning_rate": 1e-06, "loss": -0.0078, "step": 253 }, { "clip_ratio/high_max": 0.002901366307924036, "clip_ratio/high_mean": 0.0012334625025687274, "clip_ratio/low_mean": 0.0014136367390165105, "clip_ratio/low_min": 0.00012771209549100604, "clip_ratio/region_mean": 0.0026470992161193863, "epoch": 3.5784256559766763, "grad_norm": 0.12591688334941864, "learning_rate": 1e-06, "loss": 0.0308, "step": 254 }, { "clip_ratio/high_max": 0.0023122161510400474, "clip_ratio/high_mean": 0.001132892728492152, "clip_ratio/low_mean": 0.0013907535576436203, "clip_ratio/low_min": 0.00016566381236771122, "clip_ratio/region_mean": 0.0025236463407054543, "epoch": 3.5877551020408163, "grad_norm": 0.12184864282608032, "learning_rate": 1e-06, "loss": -0.0038, "step": 255 }, { "clip_ratio/high_max": 0.0026201951259281486, "clip_ratio/high_mean": 0.0011652265438897302, "clip_ratio/low_mean": 0.001409195323503809, "clip_ratio/low_min": 0.00011419090697017964, "clip_ratio/region_mean": 0.0025744218146428466, "epoch": 3.5970845481049563, "grad_norm": 0.11056038737297058, "learning_rate": 1e-06, "loss": 0.0087, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0198451450892857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 633.3449096679688, "completions/mean_terminated_length": 563.2366943359375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 4.0093294460641395, "grad_norm": 0.12988856434822083, "learning_rate": 1e-06, "loss": 0.013, "num_tokens": 186267505.0, "reward": 0.5701032876968384, "reward_std": 0.20984646677970886, "rewards/simpleverify_reward/mean": 0.5701032280921936, "rewards/simpleverify_reward/std": 0.4950654208660126, "step": 257 }, { "clip_ratio/high_max": 0.0020908534424961545, "clip_ratio/high_mean": 0.0009248588939954061, "clip_ratio/low_mean": 0.0006118935616541421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015367524756584316, "epoch": 4.01865889212828, "grad_norm": 0.11844545602798462, "learning_rate": 1e-06, "loss": -0.0163, "step": 258 }, { "clip_ratio/high_max": 0.0024169725002138875, "clip_ratio/high_mean": 0.0009596188428986352, "clip_ratio/low_mean": 0.0006339341798593523, "clip_ratio/low_min": 5.774290275439853e-05, "clip_ratio/region_mean": 0.0015935530100250617, "epoch": 4.0279883381924195, "grad_norm": 0.12983858585357666, "learning_rate": 1e-06, "loss": 0.0301, "step": 259 }, { "clip_ratio/high_max": 0.0021098739161971025, "clip_ratio/high_mean": 0.0009157013937510783, "clip_ratio/low_mean": 0.0005333022372724372, "clip_ratio/low_min": 2.7551244784262963e-05, "clip_ratio/region_mean": 0.0014490036192000844, "epoch": 4.03731778425656, "grad_norm": 0.1142122820019722, "learning_rate": 1e-06, "loss": 0.0201, "step": 260 }, { "clip_ratio/high_max": 0.0025062408021767624, "clip_ratio/high_mean": 0.0010443536357342964, "clip_ratio/low_mean": 0.0006596801667910768, "clip_ratio/low_min": 4.3925439968006685e-05, "clip_ratio/region_mean": 0.0017040337916114368, "epoch": 4.0466472303206995, "grad_norm": 0.11819064617156982, "learning_rate": 1e-06, "loss": 0.0198, "step": 261 }, { "clip_ratio/high_max": 0.002299812222190667, "clip_ratio/high_mean": 0.0008828037225612206, "clip_ratio/low_mean": 0.0006577786462003132, "clip_ratio/low_min": 1.6408506780862808e-05, "clip_ratio/region_mean": 0.0015405823396577034, "epoch": 4.05597667638484, "grad_norm": 0.10681001096963882, "learning_rate": 1e-06, "loss": 0.0042, "step": 262 }, { "clip_ratio/high_max": 0.002206483230111189, "clip_ratio/high_mean": 0.0009063853976840619, "clip_ratio/low_mean": 0.0007963443276821636, "clip_ratio/low_min": 4.1082725147134624e-05, "clip_ratio/region_mean": 0.0017027296926244162, "epoch": 4.0653061224489795, "grad_norm": 0.11958717554807663, "learning_rate": 1e-06, "loss": 0.0314, "step": 263 }, { "clip_ratio/high_max": 0.002453448105370626, "clip_ratio/high_mean": 0.000957541968091391, "clip_ratio/low_mean": 0.0006116921285865828, "clip_ratio/low_min": 1.8647438992047682e-05, "clip_ratio/region_mean": 0.0015692340821260586, "epoch": 4.07463556851312, "grad_norm": 0.11270119994878769, "learning_rate": 1e-06, "loss": -0.0197, "step": 264 }, { "clip_ratio/high_max": 0.0022682247654302046, "clip_ratio/high_mean": 0.0010995151060342323, "clip_ratio/low_mean": 0.0008586617605033098, "clip_ratio/low_min": 3.056699097214732e-05, "clip_ratio/region_mean": 0.0019581768137868494, "epoch": 4.0839650145772595, "grad_norm": 0.12431418895721436, "learning_rate": 1e-06, "loss": -0.0429, "step": 265 }, { "clip_ratio/high_max": 0.0021887957336730324, "clip_ratio/high_mean": 0.0008886759751476347, "clip_ratio/low_mean": 0.000879854835147853, "clip_ratio/low_min": 0.00010521688454900868, "clip_ratio/region_mean": 0.0017685308412183076, "epoch": 4.093294460641399, "grad_norm": 0.11466091871261597, "learning_rate": 1e-06, "loss": 0.0031, "step": 266 }, { "clip_ratio/high_max": 0.002351123606786132, "clip_ratio/high_mean": 0.0009354367393825669, "clip_ratio/low_mean": 0.0008512701897416264, "clip_ratio/low_min": 1.6979081919998862e-05, "clip_ratio/region_mean": 0.0017867069254862145, "epoch": 4.1026239067055394, "grad_norm": 0.1345750391483307, "learning_rate": 1e-06, "loss": 0.0144, "step": 267 }, { "clip_ratio/high_max": 0.00229811549797887, "clip_ratio/high_mean": 0.0010024150597018888, "clip_ratio/low_mean": 0.001148469036706956, "clip_ratio/low_min": 7.296910644072341e-05, "clip_ratio/region_mean": 0.002150884138245601, "epoch": 4.111953352769679, "grad_norm": 0.12159934639930725, "learning_rate": 1e-06, "loss": 0.0448, "step": 268 }, { "clip_ratio/high_max": 0.002365124732023105, "clip_ratio/high_mean": 0.0009946237587428186, "clip_ratio/low_mean": 0.000946328169447952, "clip_ratio/low_min": 1.065098877006676e-05, "clip_ratio/region_mean": 0.0019409519154578447, "epoch": 4.121282798833819, "grad_norm": 0.12911644577980042, "learning_rate": 1e-06, "loss": 0.0135, "step": 269 }, { "clip_ratio/high_max": 0.002485420598532073, "clip_ratio/high_mean": 0.0010724675994424615, "clip_ratio/low_mean": 0.000984783640888054, "clip_ratio/low_min": 4.1116952161246445e-05, "clip_ratio/region_mean": 0.0020572512185026426, "epoch": 4.130612244897959, "grad_norm": 0.12297946214675903, "learning_rate": 1e-06, "loss": -0.0002, "step": 270 }, { "clip_ratio/high_max": 0.0020749176692334004, "clip_ratio/high_mean": 0.0009312258298450615, "clip_ratio/low_mean": 0.0009791130141820759, "clip_ratio/low_min": 4.269963847036706e-05, "clip_ratio/region_mean": 0.0019103388767689466, "epoch": 4.139941690962099, "grad_norm": 0.10888082534074783, "learning_rate": 1e-06, "loss": 0.0142, "step": 271 }, { "clip_ratio/high_max": 0.002122955716913566, "clip_ratio/high_mean": 0.0009231488220393658, "clip_ratio/low_mean": 0.0008404498512391001, "clip_ratio/low_min": 1.2605889423866756e-05, "clip_ratio/region_mean": 0.001763598651450593, "epoch": 4.149271137026239, "grad_norm": 0.11918072402477264, "learning_rate": 1e-06, "loss": -0.0408, "step": 272 }, { "clip_ratio/high_max": 0.002558688349381555, "clip_ratio/high_mean": 0.0010125796143256593, "clip_ratio/low_mean": 0.0010489000196685083, "clip_ratio/low_min": 4.142809302720707e-05, "clip_ratio/region_mean": 0.002061479608528316, "epoch": 4.158600583090379, "grad_norm": 0.12786640226840973, "learning_rate": 1e-06, "loss": -0.0022, "step": 273 }, { "clip_ratio/high_max": 0.002730152144067688, "clip_ratio/high_mean": 0.0010852359282580437, "clip_ratio/low_mean": 0.0011555486598808784, "clip_ratio/low_min": 9.449987373955082e-05, "clip_ratio/region_mean": 0.0022407845754059963, "epoch": 4.167930029154519, "grad_norm": 0.12328851222991943, "learning_rate": 1e-06, "loss": -0.0073, "step": 274 }, { "clip_ratio/high_max": 0.002337503588933032, "clip_ratio/high_mean": 0.0011032946422346868, "clip_ratio/low_mean": 0.0010695339278754545, "clip_ratio/low_min": 9.362400578538654e-05, "clip_ratio/region_mean": 0.002172828622860834, "epoch": 4.1772594752186585, "grad_norm": 0.1283477246761322, "learning_rate": 1e-06, "loss": -0.0113, "step": 275 }, { "clip_ratio/high_max": 0.0024091437298920937, "clip_ratio/high_mean": 0.0010472331050550565, "clip_ratio/low_mean": 0.001251158282684628, "clip_ratio/low_min": 9.006630898511503e-05, "clip_ratio/region_mean": 0.002298391453223303, "epoch": 4.186588921282799, "grad_norm": 0.12439129501581192, "learning_rate": 1e-06, "loss": 0.0253, "step": 276 }, { "clip_ratio/high_max": 0.002555749480961822, "clip_ratio/high_mean": 0.001136602833867073, "clip_ratio/low_mean": 0.00109997466825007, "clip_ratio/low_min": 0.00010164565719605889, "clip_ratio/region_mean": 0.0022365774857462384, "epoch": 4.1959183673469385, "grad_norm": 0.1284770518541336, "learning_rate": 1e-06, "loss": -0.0316, "step": 277 }, { "clip_ratio/high_max": 0.002383962339081336, "clip_ratio/high_mean": 0.0010050461278297007, "clip_ratio/low_mean": 0.0012296343520574737, "clip_ratio/low_min": 0.00013423965629044687, "clip_ratio/region_mean": 0.002234680468973238, "epoch": 4.205247813411079, "grad_norm": 0.1269519329071045, "learning_rate": 1e-06, "loss": 0.0315, "step": 278 }, { "clip_ratio/high_max": 0.0025625654016039334, "clip_ratio/high_mean": 0.0010282204348186497, "clip_ratio/low_mean": 0.0011528983050084207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002181118747103028, "epoch": 4.214577259475218, "grad_norm": 0.11142841726541519, "learning_rate": 1e-06, "loss": -0.0009, "step": 279 }, { "clip_ratio/high_max": 0.0028328740372671746, "clip_ratio/high_mean": 0.001188128771900665, "clip_ratio/low_mean": 0.001024641631374834, "clip_ratio/low_min": 0.00012055297520419117, "clip_ratio/region_mean": 0.002212770385085605, "epoch": 4.223906705539359, "grad_norm": 0.12641702592372894, "learning_rate": 1e-06, "loss": -0.0712, "step": 280 }, { "clip_ratio/high_max": 0.002491969891707413, "clip_ratio/high_mean": 0.0011266416040598415, "clip_ratio/low_mean": 0.0011804221103375312, "clip_ratio/low_min": 7.797056605340913e-05, "clip_ratio/region_mean": 0.002307063725311309, "epoch": 4.233236151603498, "grad_norm": 0.12074771523475647, "learning_rate": 1e-06, "loss": -0.0392, "step": 281 }, { "clip_ratio/high_max": 0.00244262828346109, "clip_ratio/high_mean": 0.0009119560554609052, "clip_ratio/low_mean": 0.0011744700714189094, "clip_ratio/low_min": 0.00022259577690419974, "clip_ratio/region_mean": 0.0020864261059614364, "epoch": 4.242565597667639, "grad_norm": 0.11982996016740799, "learning_rate": 1e-06, "loss": 0.0058, "step": 282 }, { "clip_ratio/high_max": 0.001973771104530897, "clip_ratio/high_mean": 0.0009037029594765045, "clip_ratio/low_mean": 0.0014119122315605637, "clip_ratio/low_min": 5.4030817409511656e-05, "clip_ratio/region_mean": 0.002315615158295259, "epoch": 4.251895043731778, "grad_norm": 0.11924701929092407, "learning_rate": 1e-06, "loss": 0.0379, "step": 283 }, { "clip_ratio/high_max": 0.002192493964685127, "clip_ratio/high_mean": 0.0009306118699896615, "clip_ratio/low_mean": 0.0010762799756776076, "clip_ratio/low_min": 0.00012505865015555173, "clip_ratio/region_mean": 0.002006891882047057, "epoch": 4.261224489795918, "grad_norm": 0.11580277979373932, "learning_rate": 1e-06, "loss": 0.026, "step": 284 }, { "clip_ratio/high_max": 0.002154302259441465, "clip_ratio/high_mean": 0.0010459450022608507, "clip_ratio/low_mean": 0.0012652925979637075, "clip_ratio/low_min": 0.000134850743052084, "clip_ratio/region_mean": 0.002311237607500516, "epoch": 4.270553935860058, "grad_norm": 0.13243380188941956, "learning_rate": 1e-06, "loss": 0.0234, "step": 285 }, { "clip_ratio/high_max": 0.0024635402733110823, "clip_ratio/high_mean": 0.000988963203781168, "clip_ratio/low_mean": 0.001163630215160083, "clip_ratio/low_min": 0.0001924317257362418, "clip_ratio/region_mean": 0.00215259339165641, "epoch": 4.279883381924198, "grad_norm": 0.11850841343402863, "learning_rate": 1e-06, "loss": -0.0112, "step": 286 }, { "clip_ratio/high_max": 0.002560369757702574, "clip_ratio/high_mean": 0.0011404558681533672, "clip_ratio/low_mean": 0.0013572694479080383, "clip_ratio/low_min": 0.00013896627842768794, "clip_ratio/region_mean": 0.0024977253124234267, "epoch": 4.289212827988338, "grad_norm": 0.1256835013628006, "learning_rate": 1e-06, "loss": 0.0401, "step": 287 }, { "clip_ratio/high_max": 0.0022343709933920763, "clip_ratio/high_mean": 0.0009108921967708739, "clip_ratio/low_mean": 0.0011752546743082348, "clip_ratio/low_min": 0.0001174202079710085, "clip_ratio/region_mean": 0.0020861468656221405, "epoch": 4.298542274052478, "grad_norm": 0.1245896965265274, "learning_rate": 1e-06, "loss": 0.0404, "step": 288 }, { "clip_ratio/high_max": 0.0026747331357910298, "clip_ratio/high_mean": 0.0010331952926208032, "clip_ratio/low_mean": 0.001311896885454189, "clip_ratio/low_min": 0.00014947346244298387, "clip_ratio/region_mean": 0.002345092194445897, "epoch": 4.307871720116618, "grad_norm": 0.11085493862628937, "learning_rate": 1e-06, "loss": 0.0267, "step": 289 }, { "clip_ratio/high_max": 0.0027068806084571406, "clip_ratio/high_mean": 0.0011260343235335313, "clip_ratio/low_mean": 0.00114196895447094, "clip_ratio/low_min": 0.00014054378698347136, "clip_ratio/region_mean": 0.0022680032925563864, "epoch": 4.317201166180758, "grad_norm": 0.12553079426288605, "learning_rate": 1e-06, "loss": 0.0117, "step": 290 }, { "clip_ratio/high_max": 0.002579201631306205, "clip_ratio/high_mean": 0.0010826847319549415, "clip_ratio/low_mean": 0.001319100076216273, "clip_ratio/low_min": 0.00016733756547182566, "clip_ratio/region_mean": 0.0024017847754294053, "epoch": 4.326530612244898, "grad_norm": 0.12764209508895874, "learning_rate": 1e-06, "loss": 0.0144, "step": 291 }, { "clip_ratio/high_max": 0.00249725476896856, "clip_ratio/high_mean": 0.0011542900429049041, "clip_ratio/low_mean": 0.00111367888712266, "clip_ratio/low_min": 2.815515017573489e-05, "clip_ratio/region_mean": 0.0022679689282085747, "epoch": 4.335860058309038, "grad_norm": 0.1246132105588913, "learning_rate": 1e-06, "loss": -0.0238, "step": 292 }, { "clip_ratio/high_max": 0.00241856378124794, "clip_ratio/high_mean": 0.0010271665178152034, "clip_ratio/low_mean": 0.0012715058073808905, "clip_ratio/low_min": 0.00011820547661045566, "clip_ratio/region_mean": 0.0022986723633948714, "epoch": 4.345189504373177, "grad_norm": 0.125278502702713, "learning_rate": 1e-06, "loss": 0.0223, "step": 293 }, { "clip_ratio/high_max": 0.003100218717008829, "clip_ratio/high_mean": 0.001345846161711961, "clip_ratio/low_mean": 0.0011336277548252838, "clip_ratio/low_min": 0.00016647075244691223, "clip_ratio/region_mean": 0.0024794739365461282, "epoch": 4.354518950437318, "grad_norm": 0.12681154906749725, "learning_rate": 1e-06, "loss": -0.0231, "step": 294 }, { "clip_ratio/high_max": 0.0025621378226787783, "clip_ratio/high_mean": 0.00110545907591586, "clip_ratio/low_mean": 0.0011410650822654134, "clip_ratio/low_min": 0.0001393528000335209, "clip_ratio/region_mean": 0.002246524156362284, "epoch": 4.363848396501457, "grad_norm": 0.11944369971752167, "learning_rate": 1e-06, "loss": -0.0006, "step": 295 }, { "clip_ratio/high_max": 0.0024155409410013817, "clip_ratio/high_mean": 0.0011104911027359776, "clip_ratio/low_mean": 0.0011493445454107132, "clip_ratio/low_min": 9.195715210807975e-05, "clip_ratio/region_mean": 0.0022598356954404153, "epoch": 4.373177842565598, "grad_norm": 0.12992224097251892, "learning_rate": 1e-06, "loss": -0.0027, "step": 296 }, { "clip_ratio/high_max": 0.002720894022786524, "clip_ratio/high_mean": 0.0012110996212868486, "clip_ratio/low_mean": 0.0009991417209676001, "clip_ratio/low_min": 5.8841151258093305e-05, "clip_ratio/region_mean": 0.0022102413131506182, "epoch": 4.382507288629737, "grad_norm": 0.1187589094042778, "learning_rate": 1e-06, "loss": -0.0411, "step": 297 }, { "clip_ratio/high_max": 0.002440937743813265, "clip_ratio/high_mean": 0.0012230294705659617, "clip_ratio/low_mean": 0.0012636580413527554, "clip_ratio/low_min": 0.0001834376325859921, "clip_ratio/region_mean": 0.0024866875173756853, "epoch": 4.391836734693878, "grad_norm": 0.11927883327007294, "learning_rate": 1e-06, "loss": -0.0068, "step": 298 }, { "clip_ratio/high_max": 0.00268653185776202, "clip_ratio/high_mean": 0.0011927109226235189, "clip_ratio/low_mean": 0.001165936449979199, "clip_ratio/low_min": 0.00020812458114960464, "clip_ratio/region_mean": 0.002358647332584951, "epoch": 4.401166180758017, "grad_norm": 0.13587383925914764, "learning_rate": 1e-06, "loss": -0.0018, "step": 299 }, { "clip_ratio/high_max": 0.002681189842405729, "clip_ratio/high_mean": 0.0012017971057503019, "clip_ratio/low_mean": 0.0009524114539090078, "clip_ratio/low_min": 2.6411143153382e-05, "clip_ratio/region_mean": 0.0021542085451073945, "epoch": 4.410495626822158, "grad_norm": 0.11905869841575623, "learning_rate": 1e-06, "loss": -0.0027, "step": 300 }, { "clip_ratio/high_max": 0.0030099656178208534, "clip_ratio/high_mean": 0.0012513937235780759, "clip_ratio/low_mean": 0.0011389283727112343, "clip_ratio/low_min": 5.8096340580959804e-05, "clip_ratio/region_mean": 0.0023903220062493347, "epoch": 4.419825072886297, "grad_norm": 0.1261315494775772, "learning_rate": 1e-06, "loss": -0.0325, "step": 301 }, { "clip_ratio/high_max": 0.00295354258196312, "clip_ratio/high_mean": 0.0011922542544198222, "clip_ratio/low_mean": 0.0011407496240281034, "clip_ratio/low_min": 0.00012139445607317612, "clip_ratio/region_mean": 0.002333003889361862, "epoch": 4.429154518950437, "grad_norm": 0.1312561184167862, "learning_rate": 1e-06, "loss": 0.0067, "step": 302 }, { "clip_ratio/high_max": 0.0026947886726702563, "clip_ratio/high_mean": 0.001134440271925996, "clip_ratio/low_mean": 0.0011474858451947512, "clip_ratio/low_min": 3.3685639209579676e-05, "clip_ratio/region_mean": 0.0022819261357653886, "epoch": 4.438483965014577, "grad_norm": 0.13060961663722992, "learning_rate": 1e-06, "loss": 0.0052, "step": 303 }, { "clip_ratio/high_max": 0.0028368889543344267, "clip_ratio/high_mean": 0.0011986778699792922, "clip_ratio/low_mean": 0.0011177725564266439, "clip_ratio/low_min": 0.00013663903700944502, "clip_ratio/region_mean": 0.00231645046005724, "epoch": 4.447813411078717, "grad_norm": 0.12184076756238937, "learning_rate": 1e-06, "loss": 0.0294, "step": 304 }, { "clip_ratio/high_max": 0.0026461620291229337, "clip_ratio/high_mean": 0.0012459568715712521, "clip_ratio/low_mean": 0.001223670846229652, "clip_ratio/low_min": 0.00022749119671061635, "clip_ratio/region_mean": 0.0024696276741451584, "epoch": 4.457142857142857, "grad_norm": 0.1280045509338379, "learning_rate": 1e-06, "loss": 0.0149, "step": 305 }, { "clip_ratio/high_max": 0.0028626590574276634, "clip_ratio/high_mean": 0.0011490106480778195, "clip_ratio/low_mean": 0.0010497920302441344, "clip_ratio/low_min": 5.4598634960711934e-05, "clip_ratio/region_mean": 0.0021988027074257843, "epoch": 4.466472303206997, "grad_norm": 0.12246491760015488, "learning_rate": 1e-06, "loss": 0.0215, "step": 306 }, { "clip_ratio/high_max": 0.0027839219983434305, "clip_ratio/high_mean": 0.0011757337906601606, "clip_ratio/low_mean": 0.0010917840081674512, "clip_ratio/low_min": 7.153618389565963e-05, "clip_ratio/region_mean": 0.002267517826112453, "epoch": 4.475801749271137, "grad_norm": 0.11523416638374329, "learning_rate": 1e-06, "loss": -0.0143, "step": 307 }, { "clip_ratio/high_max": 0.0026096665096702054, "clip_ratio/high_mean": 0.0012803206191165373, "clip_ratio/low_mean": 0.001147410617704736, "clip_ratio/low_min": 4.01049528591102e-05, "clip_ratio/region_mean": 0.0024277312550111674, "epoch": 4.485131195335277, "grad_norm": 0.12913104891777039, "learning_rate": 1e-06, "loss": -0.0084, "step": 308 }, { "clip_ratio/high_max": 0.002587119917734526, "clip_ratio/high_mean": 0.0010348725227231625, "clip_ratio/low_mean": 0.0012732024624710903, "clip_ratio/low_min": 9.98653322312748e-05, "clip_ratio/region_mean": 0.0023080749670043588, "epoch": 4.494460641399417, "grad_norm": 0.13543091714382172, "learning_rate": 1e-06, "loss": 0.0552, "step": 309 }, { "clip_ratio/high_max": 0.002501116046914831, "clip_ratio/high_mean": 0.0011944292346015573, "clip_ratio/low_mean": 0.0012013216619379818, "clip_ratio/low_min": 0.00016099921867862577, "clip_ratio/region_mean": 0.002395750881987624, "epoch": 4.503790087463557, "grad_norm": 0.10411816090345383, "learning_rate": 1e-06, "loss": 0.0056, "step": 310 }, { "clip_ratio/high_max": 0.0026851797665585764, "clip_ratio/high_mean": 0.0010980216757161543, "clip_ratio/low_mean": 0.0015183269351837225, "clip_ratio/low_min": 0.0001243856950168265, "clip_ratio/region_mean": 0.002616348610899877, "epoch": 4.513119533527696, "grad_norm": 0.12977652251720428, "learning_rate": 1e-06, "loss": 0.0259, "step": 311 }, { "clip_ratio/high_max": 0.0024513979733455926, "clip_ratio/high_mean": 0.0010857978741114493, "clip_ratio/low_mean": 0.0014130021700111683, "clip_ratio/low_min": 0.0003086895030719461, "clip_ratio/region_mean": 0.0024987999931909144, "epoch": 4.522448979591837, "grad_norm": 0.11546915024518967, "learning_rate": 1e-06, "loss": 0.0296, "step": 312 }, { "clip_ratio/high_max": 0.002582550539955264, "clip_ratio/high_mean": 0.0011205352147953818, "clip_ratio/low_mean": 0.001067399003659375, "clip_ratio/low_min": 0.00013611125268653268, "clip_ratio/region_mean": 0.0021879342166357674, "epoch": 4.531778425655976, "grad_norm": 0.1222282350063324, "learning_rate": 1e-06, "loss": -0.0312, "step": 313 }, { "clip_ratio/high_max": 0.002486808065441437, "clip_ratio/high_mean": 0.001108541680878261, "clip_ratio/low_mean": 0.0013029424262640532, "clip_ratio/low_min": 0.00014830433246970642, "clip_ratio/region_mean": 0.0024114841216942295, "epoch": 4.541107871720117, "grad_norm": 0.1204584464430809, "learning_rate": 1e-06, "loss": -0.0196, "step": 314 }, { "clip_ratio/high_max": 0.002567245996033307, "clip_ratio/high_mean": 0.0011452670205471804, "clip_ratio/low_mean": 0.0012851643477915786, "clip_ratio/low_min": 0.00021749450661445735, "clip_ratio/region_mean": 0.0024304313483298756, "epoch": 4.550437317784256, "grad_norm": 0.12581069767475128, "learning_rate": 1e-06, "loss": -0.0084, "step": 315 }, { "clip_ratio/high_max": 0.0029545294819399714, "clip_ratio/high_mean": 0.0012033091225021053, "clip_ratio/low_mean": 0.0010612360765662743, "clip_ratio/low_min": 7.345522863033693e-05, "clip_ratio/region_mean": 0.00226454519724939, "epoch": 4.559766763848397, "grad_norm": 0.12477901577949524, "learning_rate": 1e-06, "loss": -0.0127, "step": 316 }, { "clip_ratio/high_max": 0.0026394765154691413, "clip_ratio/high_mean": 0.0012130733375670388, "clip_ratio/low_mean": 0.0010973194766847882, "clip_ratio/low_min": 8.088763570412993e-05, "clip_ratio/region_mean": 0.002310392759682145, "epoch": 4.569096209912536, "grad_norm": 0.11980269104242325, "learning_rate": 1e-06, "loss": -0.0294, "step": 317 }, { "clip_ratio/high_max": 0.0027670708805089816, "clip_ratio/high_mean": 0.0011913067246496212, "clip_ratio/low_mean": 0.0012311444261285942, "clip_ratio/low_min": 0.00015340284244302893, "clip_ratio/region_mean": 0.0024224511289503425, "epoch": 4.578425655976677, "grad_norm": 0.12332651019096375, "learning_rate": 1e-06, "loss": 0.0144, "step": 318 }, { "clip_ratio/high_max": 0.0026189246054855175, "clip_ratio/high_mean": 0.0011845795452245511, "clip_ratio/low_mean": 0.0012857424590038136, "clip_ratio/low_min": 4.9929793931369204e-05, "clip_ratio/region_mean": 0.0024703219169168733, "epoch": 4.587755102040816, "grad_norm": 0.11435230821371078, "learning_rate": 1e-06, "loss": -0.007, "step": 319 }, { "clip_ratio/high_max": 0.0026665675541153178, "clip_ratio/high_mean": 0.001226918548127287, "clip_ratio/low_mean": 0.0011941068987653125, "clip_ratio/low_min": 0.00016394666363339638, "clip_ratio/region_mean": 0.0024210253832279705, "epoch": 4.597084548104956, "grad_norm": 0.12260650098323822, "learning_rate": 1e-06, "loss": -0.0185, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.023297991071428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 650.0001831054688, "completions/mean_terminated_length": 567.8002319335938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 5.0093294460641395, "grad_norm": 0.1160479187965393, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 223770075.0, "reward": 0.5726667642593384, "reward_std": 0.20052407681941986, "rewards/simpleverify_reward/mean": 0.5726667046546936, "rewards/simpleverify_reward/std": 0.4946956932544708, "step": 321 }, { "clip_ratio/high_max": 0.002331171781406738, "clip_ratio/high_mean": 0.0008940868410718394, "clip_ratio/low_mean": 0.0006400234624379664, "clip_ratio/low_min": 6.731693156325491e-05, "clip_ratio/region_mean": 0.00153411030623829, "epoch": 5.01865889212828, "grad_norm": 0.12034779787063599, "learning_rate": 1e-06, "loss": 0.0079, "step": 322 }, { "clip_ratio/high_max": 0.002272610225190874, "clip_ratio/high_mean": 0.0008834997934172861, "clip_ratio/low_mean": 0.000596067064179806, "clip_ratio/low_min": 7.751191424176795e-05, "clip_ratio/region_mean": 0.001479566897614859, "epoch": 5.0279883381924195, "grad_norm": 0.12023911625146866, "learning_rate": 1e-06, "loss": 0.0223, "step": 323 }, { "clip_ratio/high_max": 0.0020319217583164573, "clip_ratio/high_mean": 0.0008217888735089218, "clip_ratio/low_mean": 0.00048340864668716677, "clip_ratio/low_min": 8.909480129659642e-06, "clip_ratio/region_mean": 0.0013051975111011416, "epoch": 5.03731778425656, "grad_norm": 0.11286524683237076, "learning_rate": 1e-06, "loss": -0.0048, "step": 324 }, { "clip_ratio/high_max": 0.0018754447446553968, "clip_ratio/high_mean": 0.0008405982625845354, "clip_ratio/low_mean": 0.0006338683879221207, "clip_ratio/low_min": 7.990335507201962e-05, "clip_ratio/region_mean": 0.0014744666223123204, "epoch": 5.0466472303206995, "grad_norm": 0.11907852441072464, "learning_rate": 1e-06, "loss": 0.0325, "step": 325 }, { "clip_ratio/high_max": 0.0025481356206000783, "clip_ratio/high_mean": 0.0010352416102250572, "clip_ratio/low_mean": 0.0006705237265123287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017057653240044601, "epoch": 5.05597667638484, "grad_norm": 0.12502989172935486, "learning_rate": 1e-06, "loss": -0.0346, "step": 326 }, { "clip_ratio/high_max": 0.0019633485935628414, "clip_ratio/high_mean": 0.0008725416064407909, "clip_ratio/low_mean": 0.0007412556515191682, "clip_ratio/low_min": 0.0001087099444703199, "clip_ratio/region_mean": 0.0016137972525029909, "epoch": 5.0653061224489795, "grad_norm": 0.1014496237039566, "learning_rate": 1e-06, "loss": 0.0234, "step": 327 }, { "clip_ratio/high_max": 0.0021971239148115274, "clip_ratio/high_mean": 0.0009192342222377192, "clip_ratio/low_mean": 0.000827921638119733, "clip_ratio/low_min": 8.556289049010957e-05, "clip_ratio/region_mean": 0.0017471558603574522, "epoch": 5.07463556851312, "grad_norm": 0.12265725433826447, "learning_rate": 1e-06, "loss": 0.058, "step": 328 }, { "clip_ratio/high_max": 0.002226817923656199, "clip_ratio/high_mean": 0.000960827879680437, "clip_ratio/low_mean": 0.0009769243661139626, "clip_ratio/low_min": 8.831449940771563e-05, "clip_ratio/region_mean": 0.001937752211233601, "epoch": 5.0839650145772595, "grad_norm": 0.11613408476114273, "learning_rate": 1e-06, "loss": 0.0296, "step": 329 }, { "clip_ratio/high_max": 0.0021740121810580604, "clip_ratio/high_mean": 0.0010172566544497386, "clip_ratio/low_mean": 0.001015359772281954, "clip_ratio/low_min": 7.959023332659854e-05, "clip_ratio/region_mean": 0.0020326164449215867, "epoch": 5.093294460641399, "grad_norm": 0.12618719041347504, "learning_rate": 1e-06, "loss": 0.006, "step": 330 }, { "clip_ratio/high_max": 0.002297082610311918, "clip_ratio/high_mean": 0.0009815042503760196, "clip_ratio/low_mean": 0.0007617335259055835, "clip_ratio/low_min": 3.755227771762293e-05, "clip_ratio/region_mean": 0.001743237764458172, "epoch": 5.1026239067055394, "grad_norm": 0.11878614127635956, "learning_rate": 1e-06, "loss": -0.0225, "step": 331 }, { "clip_ratio/high_max": 0.002167860948247835, "clip_ratio/high_mean": 0.0009389580663992092, "clip_ratio/low_mean": 0.0008977128345577512, "clip_ratio/low_min": 5.93311578995781e-05, "clip_ratio/region_mean": 0.0018366708682151511, "epoch": 5.111953352769679, "grad_norm": 0.12522326409816742, "learning_rate": 1e-06, "loss": 0.0203, "step": 332 }, { "clip_ratio/high_max": 0.0024267619373858906, "clip_ratio/high_mean": 0.0010387013389845379, "clip_ratio/low_mean": 0.0009657479022280313, "clip_ratio/low_min": 6.526062134071253e-05, "clip_ratio/region_mean": 0.0020044491757289506, "epoch": 5.121282798833819, "grad_norm": 0.13469408452510834, "learning_rate": 1e-06, "loss": 0.0223, "step": 333 }, { "clip_ratio/high_max": 0.002821952301019337, "clip_ratio/high_mean": 0.001127877689214074, "clip_ratio/low_mean": 0.0008927829876483884, "clip_ratio/low_min": 6.936312092875596e-05, "clip_ratio/region_mean": 0.002020660656853579, "epoch": 5.130612244897959, "grad_norm": 0.11073664575815201, "learning_rate": 1e-06, "loss": -0.0158, "step": 334 }, { "clip_ratio/high_max": 0.0023295316786970943, "clip_ratio/high_mean": 0.000995270282146521, "clip_ratio/low_mean": 0.0009571791997586843, "clip_ratio/low_min": 6.559574740094831e-05, "clip_ratio/region_mean": 0.0019524495146470144, "epoch": 5.139941690962099, "grad_norm": 0.12611344456672668, "learning_rate": 1e-06, "loss": 0.0282, "step": 335 }, { "clip_ratio/high_max": 0.002461226442392217, "clip_ratio/high_mean": 0.0010669805851648562, "clip_ratio/low_mean": 0.0008438949716946809, "clip_ratio/low_min": 0.00011409677699703025, "clip_ratio/region_mean": 0.0019108755222987384, "epoch": 5.149271137026239, "grad_norm": 0.12872008979320526, "learning_rate": 1e-06, "loss": 0.0103, "step": 336 }, { "clip_ratio/high_max": 0.0028701744886348024, "clip_ratio/high_mean": 0.0011309242145216558, "clip_ratio/low_mean": 0.0009735184194141766, "clip_ratio/low_min": 6.777114595024614e-05, "clip_ratio/region_mean": 0.00210444267213461, "epoch": 5.158600583090379, "grad_norm": 0.12863174080848694, "learning_rate": 1e-06, "loss": -0.0052, "step": 337 }, { "clip_ratio/high_max": 0.0024400284673902206, "clip_ratio/high_mean": 0.0011732769817172084, "clip_ratio/low_mean": 0.0008540326525690034, "clip_ratio/low_min": 9.653192682890221e-05, "clip_ratio/region_mean": 0.002027309630648233, "epoch": 5.167930029154519, "grad_norm": 0.16117098927497864, "learning_rate": 1e-06, "loss": 0.0118, "step": 338 }, { "clip_ratio/high_max": 0.0023087248409865424, "clip_ratio/high_mean": 0.0010590554120426532, "clip_ratio/low_mean": 0.0008177639774658019, "clip_ratio/low_min": 8.208412145904731e-05, "clip_ratio/region_mean": 0.0018768193549476564, "epoch": 5.1772594752186585, "grad_norm": 0.12683363258838654, "learning_rate": 1e-06, "loss": -0.0036, "step": 339 }, { "clip_ratio/high_max": 0.0023404003150062636, "clip_ratio/high_mean": 0.0010037991087301634, "clip_ratio/low_mean": 0.0009337233095720876, "clip_ratio/low_min": 3.5834959817293566e-05, "clip_ratio/region_mean": 0.0019375223855604418, "epoch": 5.186588921282799, "grad_norm": 0.11845511943101883, "learning_rate": 1e-06, "loss": -0.0148, "step": 340 }, { "clip_ratio/high_max": 0.0024044037927524187, "clip_ratio/high_mean": 0.0010416822224215139, "clip_ratio/low_mean": 0.0008880656714609358, "clip_ratio/low_min": 1.4420857951336075e-05, "clip_ratio/region_mean": 0.0019297479229862802, "epoch": 5.1959183673469385, "grad_norm": 0.11200569570064545, "learning_rate": 1e-06, "loss": -0.0225, "step": 341 }, { "clip_ratio/high_max": 0.002634903801663313, "clip_ratio/high_mean": 0.0011278366218903102, "clip_ratio/low_mean": 0.0009324102247774135, "clip_ratio/low_min": 3.589794141589664e-05, "clip_ratio/region_mean": 0.0020602468866854906, "epoch": 5.205247813411079, "grad_norm": 0.12052994966506958, "learning_rate": 1e-06, "loss": -0.0287, "step": 342 }, { "clip_ratio/high_max": 0.0022138115018606186, "clip_ratio/high_mean": 0.0009726383177621756, "clip_ratio/low_mean": 0.0011030281657440355, "clip_ratio/low_min": 5.18448341608746e-05, "clip_ratio/region_mean": 0.0020756665180670097, "epoch": 5.214577259475218, "grad_norm": 0.12434986978769302, "learning_rate": 1e-06, "loss": 0.0144, "step": 343 }, { "clip_ratio/high_max": 0.0021740799566032365, "clip_ratio/high_mean": 0.0010007506134570576, "clip_ratio/low_mean": 0.0012497354691731744, "clip_ratio/low_min": 0.00015783339495101245, "clip_ratio/region_mean": 0.0022504861117340624, "epoch": 5.223906705539359, "grad_norm": 0.11278295516967773, "learning_rate": 1e-06, "loss": 0.0057, "step": 344 }, { "clip_ratio/high_max": 0.0024882980651455, "clip_ratio/high_mean": 0.0011950697225984186, "clip_ratio/low_mean": 0.0010738543496700004, "clip_ratio/low_min": 3.2618742807244416e-05, "clip_ratio/region_mean": 0.0022689240795443766, "epoch": 5.233236151603498, "grad_norm": 0.12787608802318573, "learning_rate": 1e-06, "loss": -0.0455, "step": 345 }, { "clip_ratio/high_max": 0.0026640323412721045, "clip_ratio/high_mean": 0.0011983844451606274, "clip_ratio/low_mean": 0.0011817977501777932, "clip_ratio/low_min": 0.0001532142487121746, "clip_ratio/region_mean": 0.0023801822171662934, "epoch": 5.242565597667639, "grad_norm": 0.12050221115350723, "learning_rate": 1e-06, "loss": 0.0163, "step": 346 }, { "clip_ratio/high_max": 0.002212466177297756, "clip_ratio/high_mean": 0.0010118559766851831, "clip_ratio/low_mean": 0.001352729981590528, "clip_ratio/low_min": 0.00010361557269789046, "clip_ratio/region_mean": 0.0023645859546377324, "epoch": 5.251895043731778, "grad_norm": 0.11172395944595337, "learning_rate": 1e-06, "loss": 0.0195, "step": 347 }, { "clip_ratio/high_max": 0.0026730827521532774, "clip_ratio/high_mean": 0.0011719388348865323, "clip_ratio/low_mean": 0.0010778136183944298, "clip_ratio/low_min": 8.645639081805712e-05, "clip_ratio/region_mean": 0.0022497524769278243, "epoch": 5.261224489795918, "grad_norm": 0.12630048394203186, "learning_rate": 1e-06, "loss": -0.0338, "step": 348 }, { "clip_ratio/high_max": 0.0025160069053526968, "clip_ratio/high_mean": 0.0011735569787560962, "clip_ratio/low_mean": 0.0011306305132166017, "clip_ratio/low_min": 6.0883179685333744e-05, "clip_ratio/region_mean": 0.0023041874592308886, "epoch": 5.270553935860058, "grad_norm": 0.1273338794708252, "learning_rate": 1e-06, "loss": -0.022, "step": 349 }, { "clip_ratio/high_max": 0.0023636617697775364, "clip_ratio/high_mean": 0.0010016886990342755, "clip_ratio/low_mean": 0.0012023055060126353, "clip_ratio/low_min": 0.0001444820827600779, "clip_ratio/region_mean": 0.002203994197770953, "epoch": 5.279883381924198, "grad_norm": 0.12244521081447601, "learning_rate": 1e-06, "loss": 0.0098, "step": 350 }, { "clip_ratio/high_max": 0.002760966177447699, "clip_ratio/high_mean": 0.0010318204731447622, "clip_ratio/low_mean": 0.001118751186368172, "clip_ratio/low_min": 4.047739548695972e-05, "clip_ratio/region_mean": 0.002150571694073733, "epoch": 5.289212827988338, "grad_norm": 0.11932908743619919, "learning_rate": 1e-06, "loss": 0.006, "step": 351 }, { "clip_ratio/high_max": 0.002449590181640815, "clip_ratio/high_mean": 0.0009278356610593619, "clip_ratio/low_mean": 0.001007508508337196, "clip_ratio/low_min": 0.00011873193670908222, "clip_ratio/region_mean": 0.0019353441166458651, "epoch": 5.298542274052478, "grad_norm": 0.12832795083522797, "learning_rate": 1e-06, "loss": -0.0164, "step": 352 }, { "clip_ratio/high_max": 0.002966827465570532, "clip_ratio/high_mean": 0.001323889613559004, "clip_ratio/low_mean": 0.0009575075691827806, "clip_ratio/low_min": 7.740714681858663e-05, "clip_ratio/region_mean": 0.0022813971736468375, "epoch": 5.307871720116618, "grad_norm": 0.1216970905661583, "learning_rate": 1e-06, "loss": -0.0673, "step": 353 }, { "clip_ratio/high_max": 0.0026493128534639254, "clip_ratio/high_mean": 0.0011015911186404992, "clip_ratio/low_mean": 0.0011613713577389717, "clip_ratio/low_min": 0.00019017464182979893, "clip_ratio/region_mean": 0.0022629625163972378, "epoch": 5.317201166180758, "grad_norm": 0.12599064409732819, "learning_rate": 1e-06, "loss": 0.019, "step": 354 }, { "clip_ratio/high_max": 0.0023974219511728734, "clip_ratio/high_mean": 0.0010399304883321747, "clip_ratio/low_mean": 0.0012875821412308142, "clip_ratio/low_min": 0.00022142051602713764, "clip_ratio/region_mean": 0.0023275126222870313, "epoch": 5.326530612244898, "grad_norm": 0.12695588171482086, "learning_rate": 1e-06, "loss": -0.0154, "step": 355 }, { "clip_ratio/high_max": 0.0025081900967052206, "clip_ratio/high_mean": 0.0010517009268369293, "clip_ratio/low_mean": 0.00108551873563556, "clip_ratio/low_min": 3.621984251367394e-05, "clip_ratio/region_mean": 0.0021372197006712668, "epoch": 5.335860058309038, "grad_norm": 0.11755160242319107, "learning_rate": 1e-06, "loss": -0.003, "step": 356 }, { "clip_ratio/high_max": 0.0024951930026873015, "clip_ratio/high_mean": 0.0010354282185289776, "clip_ratio/low_mean": 0.0011063976344303228, "clip_ratio/low_min": 8.091726158454549e-05, "clip_ratio/region_mean": 0.0021418258838821203, "epoch": 5.345189504373177, "grad_norm": 0.1337786763906479, "learning_rate": 1e-06, "loss": 0.0198, "step": 357 }, { "clip_ratio/high_max": 0.0024985221825772896, "clip_ratio/high_mean": 0.0010636583574523684, "clip_ratio/low_mean": 0.0010912470243056305, "clip_ratio/low_min": 1.3997760106576607e-05, "clip_ratio/region_mean": 0.0021549054436036386, "epoch": 5.354518950437318, "grad_norm": 0.1356169581413269, "learning_rate": 1e-06, "loss": -0.0059, "step": 358 }, { "clip_ratio/high_max": 0.0029880207948735915, "clip_ratio/high_mean": 0.0011652917928586248, "clip_ratio/low_mean": 0.0011996547182206996, "clip_ratio/low_min": 8.742724821786396e-05, "clip_ratio/region_mean": 0.002364946558373049, "epoch": 5.363848396501457, "grad_norm": 0.11533825099468231, "learning_rate": 1e-06, "loss": 0.0052, "step": 359 }, { "clip_ratio/high_max": 0.0027105355038656853, "clip_ratio/high_mean": 0.0011398502610973082, "clip_ratio/low_mean": 0.0011601633377722465, "clip_ratio/low_min": 9.1689289547503e-05, "clip_ratio/region_mean": 0.00230001361342147, "epoch": 5.373177842565598, "grad_norm": 0.11489439755678177, "learning_rate": 1e-06, "loss": 0.0121, "step": 360 }, { "clip_ratio/high_max": 0.002577749670308549, "clip_ratio/high_mean": 0.0011227032646274893, "clip_ratio/low_mean": 0.0010684164280974073, "clip_ratio/low_min": 5.337702987162629e-05, "clip_ratio/region_mean": 0.002191119630879257, "epoch": 5.382507288629737, "grad_norm": 0.11034545302391052, "learning_rate": 1e-06, "loss": 0.0443, "step": 361 }, { "clip_ratio/high_max": 0.002486003162630368, "clip_ratio/high_mean": 0.0010863362404052168, "clip_ratio/low_mean": 0.0011133703737868927, "clip_ratio/low_min": 6.0286936786724254e-05, "clip_ratio/region_mean": 0.002199706606916152, "epoch": 5.391836734693878, "grad_norm": 0.11492704600095749, "learning_rate": 1e-06, "loss": -0.0003, "step": 362 }, { "clip_ratio/high_max": 0.002338725797017105, "clip_ratio/high_mean": 0.0009308470598625718, "clip_ratio/low_mean": 0.0010402001280453987, "clip_ratio/low_min": 0.00010227440270682564, "clip_ratio/region_mean": 0.001971047226106748, "epoch": 5.401166180758017, "grad_norm": 0.11579523235559464, "learning_rate": 1e-06, "loss": 0.0383, "step": 363 }, { "clip_ratio/high_max": 0.002818047876644414, "clip_ratio/high_mean": 0.0011057969968533143, "clip_ratio/low_mean": 0.0010729788518801797, "clip_ratio/low_min": 6.820256567152683e-05, "clip_ratio/region_mean": 0.0021787758596474305, "epoch": 5.410495626822158, "grad_norm": 0.11623141914606094, "learning_rate": 1e-06, "loss": 0.0049, "step": 364 }, { "clip_ratio/high_max": 0.002724984889937332, "clip_ratio/high_mean": 0.00099569046869874, "clip_ratio/low_mean": 0.0013767164055025205, "clip_ratio/low_min": 0.00013524930636776844, "clip_ratio/region_mean": 0.002372406866925303, "epoch": 5.419825072886297, "grad_norm": 0.120187908411026, "learning_rate": 1e-06, "loss": 0.0306, "step": 365 }, { "clip_ratio/high_max": 0.0020549648943415377, "clip_ratio/high_mean": 0.0009662838820077013, "clip_ratio/low_mean": 0.0010244866098219063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019907705354853533, "epoch": 5.429154518950437, "grad_norm": 0.1159253865480423, "learning_rate": 1e-06, "loss": 0.0056, "step": 366 }, { "clip_ratio/high_max": 0.002610755203932058, "clip_ratio/high_mean": 0.0012442871011444367, "clip_ratio/low_mean": 0.0011028297485609073, "clip_ratio/low_min": 0.0001869299112513545, "clip_ratio/region_mean": 0.0023471168824471533, "epoch": 5.438483965014577, "grad_norm": 0.13068360090255737, "learning_rate": 1e-06, "loss": -0.0493, "step": 367 }, { "clip_ratio/high_max": 0.0027408521928009577, "clip_ratio/high_mean": 0.001190292568935547, "clip_ratio/low_mean": 0.001175787670945283, "clip_ratio/low_min": 7.003932842053473e-05, "clip_ratio/region_mean": 0.0023660802689846605, "epoch": 5.447813411078717, "grad_norm": 0.12470521032810211, "learning_rate": 1e-06, "loss": 0.0069, "step": 368 }, { "clip_ratio/high_max": 0.0026157360480283387, "clip_ratio/high_mean": 0.0010111351512023248, "clip_ratio/low_mean": 0.0011441534406912979, "clip_ratio/low_min": 4.739906762551982e-05, "clip_ratio/region_mean": 0.002155288602807559, "epoch": 5.457142857142857, "grad_norm": 0.11756538599729538, "learning_rate": 1e-06, "loss": -0.0253, "step": 369 }, { "clip_ratio/high_max": 0.0024719072753214277, "clip_ratio/high_mean": 0.0010762463098217268, "clip_ratio/low_mean": 0.001172886106360238, "clip_ratio/low_min": 6.325067806756124e-05, "clip_ratio/region_mean": 0.002249132412543986, "epoch": 5.466472303206997, "grad_norm": 0.11395695060491562, "learning_rate": 1e-06, "loss": -0.0032, "step": 370 }, { "clip_ratio/high_max": 0.002748812308709603, "clip_ratio/high_mean": 0.0011688599443004932, "clip_ratio/low_mean": 0.0013253480719868094, "clip_ratio/low_min": 0.0001301289785260451, "clip_ratio/region_mean": 0.0024942079471657053, "epoch": 5.475801749271137, "grad_norm": 0.12123240530490875, "learning_rate": 1e-06, "loss": 0.0427, "step": 371 }, { "clip_ratio/high_max": 0.001981674569833558, "clip_ratio/high_mean": 0.000952392572798999, "clip_ratio/low_mean": 0.001176582720290753, "clip_ratio/low_min": 0.00018622579682414653, "clip_ratio/region_mean": 0.0021289753130986355, "epoch": 5.485131195335277, "grad_norm": 0.12081332504749298, "learning_rate": 1e-06, "loss": 0.0367, "step": 372 }, { "clip_ratio/high_max": 0.0022334548193612136, "clip_ratio/high_mean": 0.0010129321344720665, "clip_ratio/low_mean": 0.001158166785899084, "clip_ratio/low_min": 0.00011376764450687915, "clip_ratio/region_mean": 0.002171098931285087, "epoch": 5.494460641399417, "grad_norm": 0.12550733983516693, "learning_rate": 1e-06, "loss": 0.0019, "step": 373 }, { "clip_ratio/high_max": 0.0023999250479391776, "clip_ratio/high_mean": 0.001019960787743912, "clip_ratio/low_mean": 0.001106353285649675, "clip_ratio/low_min": 7.851872396713588e-05, "clip_ratio/region_mean": 0.002126314131601248, "epoch": 5.503790087463557, "grad_norm": 0.11755459755659103, "learning_rate": 1e-06, "loss": 0.0438, "step": 374 }, { "clip_ratio/high_max": 0.0026940009629470296, "clip_ratio/high_mean": 0.001118268981372239, "clip_ratio/low_mean": 0.0011399310315027833, "clip_ratio/low_min": 8.784370584180579e-05, "clip_ratio/region_mean": 0.0022581999946851283, "epoch": 5.513119533527696, "grad_norm": 0.11725718528032303, "learning_rate": 1e-06, "loss": -0.022, "step": 375 }, { "clip_ratio/high_max": 0.0025228596277884208, "clip_ratio/high_mean": 0.0011313870236335788, "clip_ratio/low_mean": 0.0011020937345165294, "clip_ratio/low_min": 0.00010748695331130875, "clip_ratio/region_mean": 0.00223348072177032, "epoch": 5.522448979591837, "grad_norm": 0.11317726224660873, "learning_rate": 1e-06, "loss": 0.0117, "step": 376 }, { "clip_ratio/high_max": 0.0027919566273340024, "clip_ratio/high_mean": 0.0010849928148672916, "clip_ratio/low_mean": 0.0012107754591852427, "clip_ratio/low_min": 8.374091157747898e-05, "clip_ratio/region_mean": 0.0022957682886044495, "epoch": 5.531778425655976, "grad_norm": 0.12786352634429932, "learning_rate": 1e-06, "loss": -0.0225, "step": 377 }, { "clip_ratio/high_max": 0.0025416793214390054, "clip_ratio/high_mean": 0.0011544046974449884, "clip_ratio/low_mean": 0.0010452705901116133, "clip_ratio/low_min": 7.583070419059368e-05, "clip_ratio/region_mean": 0.002199675283918623, "epoch": 5.541107871720117, "grad_norm": 0.12185269594192505, "learning_rate": 1e-06, "loss": 0.0075, "step": 378 }, { "clip_ratio/high_max": 0.002840211265720427, "clip_ratio/high_mean": 0.0012857738220191095, "clip_ratio/low_mean": 0.0010438932349643437, "clip_ratio/low_min": 6.664845750492532e-05, "clip_ratio/region_mean": 0.0023296670260606334, "epoch": 5.550437317784256, "grad_norm": 0.11564303934574127, "learning_rate": 1e-06, "loss": -0.0389, "step": 379 }, { "clip_ratio/high_max": 0.002514765234082006, "clip_ratio/high_mean": 0.001148274626757484, "clip_ratio/low_mean": 0.001131675027863821, "clip_ratio/low_min": 0.00013708053484151606, "clip_ratio/region_mean": 0.0022799496437073685, "epoch": 5.559766763848397, "grad_norm": 0.10961645841598511, "learning_rate": 1e-06, "loss": -0.0063, "step": 380 }, { "clip_ratio/high_max": 0.002365084528719308, "clip_ratio/high_mean": 0.0011176858915860066, "clip_ratio/low_mean": 0.0011285935797786806, "clip_ratio/low_min": 9.97857105176081e-05, "clip_ratio/region_mean": 0.0022462794513558038, "epoch": 5.569096209912536, "grad_norm": 0.1276141107082367, "learning_rate": 1e-06, "loss": 0.0246, "step": 381 }, { "clip_ratio/high_max": 0.0022333588131004944, "clip_ratio/high_mean": 0.0011299518828309374, "clip_ratio/low_mean": 0.0011652440462057712, "clip_ratio/low_min": 7.245335291372612e-05, "clip_ratio/region_mean": 0.002295195899932878, "epoch": 5.578425655976677, "grad_norm": 0.11832830309867859, "learning_rate": 1e-06, "loss": -0.0126, "step": 382 }, { "clip_ratio/high_max": 0.002499848931620363, "clip_ratio/high_mean": 0.0011582183506106958, "clip_ratio/low_mean": 0.0011567379515327048, "clip_ratio/low_min": 7.030448614386842e-05, "clip_ratio/region_mean": 0.002314956385816913, "epoch": 5.587755102040816, "grad_norm": 0.12387925386428833, "learning_rate": 1e-06, "loss": 0.0019, "step": 383 }, { "clip_ratio/high_max": 0.003059231021325104, "clip_ratio/high_mean": 0.0011537707687239163, "clip_ratio/low_mean": 0.0012203647420392372, "clip_ratio/low_min": 9.396314089826774e-05, "clip_ratio/region_mean": 0.002374135481659323, "epoch": 5.597084548104956, "grad_norm": 0.1167876198887825, "learning_rate": 1e-06, "loss": 0.007, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.026593889508928603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 657.3092041015625, "completions/mean_terminated_length": 563.3626708984375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 6.0093294460641395, "grad_norm": 0.13324810564517975, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 260918718.0, "reward": 0.5862165689468384, "reward_std": 0.19810469448566437, "rewards/simpleverify_reward/mean": 0.5862165093421936, "rewards/simpleverify_reward/std": 0.4925149381160736, "step": 385 }, { "clip_ratio/high_max": 0.0019730062267626636, "clip_ratio/high_mean": 0.0008151712245307863, "clip_ratio/low_mean": 0.0005951927241767407, "clip_ratio/low_min": 2.9049500881228596e-05, "clip_ratio/region_mean": 0.0014103639587119687, "epoch": 6.01865889212828, "grad_norm": 0.12404173612594604, "learning_rate": 1e-06, "loss": 0.027, "step": 386 }, { "clip_ratio/high_max": 0.0018699504144024104, "clip_ratio/high_mean": 0.0007915903934190283, "clip_ratio/low_mean": 0.0006663777894573286, "clip_ratio/low_min": 2.9187213840486947e-05, "clip_ratio/region_mean": 0.001457968191971304, "epoch": 6.0279883381924195, "grad_norm": 0.1162559986114502, "learning_rate": 1e-06, "loss": 0.0099, "step": 387 }, { "clip_ratio/high_max": 0.002215134503785521, "clip_ratio/high_mean": 0.000869595336553175, "clip_ratio/low_mean": 0.0007131926340662176, "clip_ratio/low_min": 3.1063616916071624e-05, "clip_ratio/region_mean": 0.0015827879433345515, "epoch": 6.03731778425656, "grad_norm": 0.13062424957752228, "learning_rate": 1e-06, "loss": 0.0404, "step": 388 }, { "clip_ratio/high_max": 0.002217354194726795, "clip_ratio/high_mean": 0.000930943337152712, "clip_ratio/low_mean": 0.0006345977781165857, "clip_ratio/low_min": 2.1999296222929843e-05, "clip_ratio/region_mean": 0.0015655411334591918, "epoch": 6.0466472303206995, "grad_norm": 0.1238541230559349, "learning_rate": 1e-06, "loss": -0.0008, "step": 389 }, { "clip_ratio/high_max": 0.0020582657016348094, "clip_ratio/high_mean": 0.0009012167392938863, "clip_ratio/low_mean": 0.0008245883491326822, "clip_ratio/low_min": 3.943782940041274e-05, "clip_ratio/region_mean": 0.0017258051193493884, "epoch": 6.05597667638484, "grad_norm": 0.12203321605920792, "learning_rate": 1e-06, "loss": 0.0155, "step": 390 }, { "clip_ratio/high_max": 0.001856033326475881, "clip_ratio/high_mean": 0.0008223900895245606, "clip_ratio/low_mean": 0.0007892270768934395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016116171536850743, "epoch": 6.0653061224489795, "grad_norm": 0.1219545230269432, "learning_rate": 1e-06, "loss": -0.0036, "step": 391 }, { "clip_ratio/high_max": 0.0019292017168481834, "clip_ratio/high_mean": 0.0008448498720099451, "clip_ratio/low_mean": 0.0008102485962808714, "clip_ratio/low_min": 3.42438888765173e-05, "clip_ratio/region_mean": 0.0016550984946661629, "epoch": 6.07463556851312, "grad_norm": 0.1314750462770462, "learning_rate": 1e-06, "loss": 0.0492, "step": 392 }, { "clip_ratio/high_max": 0.0020054995693499222, "clip_ratio/high_mean": 0.0009038643911480904, "clip_ratio/low_mean": 0.0008857637440087274, "clip_ratio/low_min": 4.695428560808068e-05, "clip_ratio/region_mean": 0.0017896280623972416, "epoch": 6.0839650145772595, "grad_norm": 0.11901679635047913, "learning_rate": 1e-06, "loss": -0.0027, "step": 393 }, { "clip_ratio/high_max": 0.0021379522295319475, "clip_ratio/high_mean": 0.0009289547087973915, "clip_ratio/low_mean": 0.0008250554019468836, "clip_ratio/low_min": 3.163425935781561e-05, "clip_ratio/region_mean": 0.0017540101398481056, "epoch": 6.093294460641399, "grad_norm": 0.1270415186882019, "learning_rate": 1e-06, "loss": 0.0566, "step": 394 }, { "clip_ratio/high_max": 0.002225495525635779, "clip_ratio/high_mean": 0.000858900921230088, "clip_ratio/low_mean": 0.0009488018295087386, "clip_ratio/low_min": 9.476575269218301e-05, "clip_ratio/region_mean": 0.0018077027561957948, "epoch": 6.1026239067055394, "grad_norm": 0.11928771436214447, "learning_rate": 1e-06, "loss": 0.0186, "step": 395 }, { "clip_ratio/high_max": 0.002256974628835451, "clip_ratio/high_mean": 0.0009545545817672973, "clip_ratio/low_mean": 0.0009311052472185111, "clip_ratio/low_min": 6.442429730668664e-05, "clip_ratio/region_mean": 0.0018856598253478296, "epoch": 6.111953352769679, "grad_norm": 0.12399211525917053, "learning_rate": 1e-06, "loss": 0.0461, "step": 396 }, { "clip_ratio/high_max": 0.0022681606715195812, "clip_ratio/high_mean": 0.0008418467168667121, "clip_ratio/low_mean": 0.0008683422320245882, "clip_ratio/low_min": 9.498564031673595e-05, "clip_ratio/region_mean": 0.0017101889461628161, "epoch": 6.121282798833819, "grad_norm": 0.12473439425230026, "learning_rate": 1e-06, "loss": -0.0112, "step": 397 }, { "clip_ratio/high_max": 0.002651781542226672, "clip_ratio/high_mean": 0.0010984279942931607, "clip_ratio/low_mean": 0.000939944467972964, "clip_ratio/low_min": 6.169328571559163e-05, "clip_ratio/region_mean": 0.002038372454990167, "epoch": 6.130612244897959, "grad_norm": 0.11931362003087997, "learning_rate": 1e-06, "loss": -0.0002, "step": 398 }, { "clip_ratio/high_max": 0.0026233702956233174, "clip_ratio/high_mean": 0.0011405270706745796, "clip_ratio/low_mean": 0.0007884775477577932, "clip_ratio/low_min": 1.4172335795592517e-05, "clip_ratio/region_mean": 0.0019290045893285424, "epoch": 6.139941690962099, "grad_norm": 0.12697699666023254, "learning_rate": 1e-06, "loss": -0.0166, "step": 399 }, { "clip_ratio/high_max": 0.0026444002796779387, "clip_ratio/high_mean": 0.0010543804346525576, "clip_ratio/low_mean": 0.0007893318888818612, "clip_ratio/low_min": 5.639423034153879e-05, "clip_ratio/region_mean": 0.0018437123580952175, "epoch": 6.149271137026239, "grad_norm": 0.11611927300691605, "learning_rate": 1e-06, "loss": -0.0209, "step": 400 }, { "clip_ratio/high_max": 0.0024569192646595184, "clip_ratio/high_mean": 0.0009570927404638496, "clip_ratio/low_mean": 0.0009184736973111285, "clip_ratio/low_min": 2.44923039645073e-05, "clip_ratio/region_mean": 0.0018755664132186212, "epoch": 6.158600583090379, "grad_norm": 0.11482306569814682, "learning_rate": 1e-06, "loss": 0.0178, "step": 401 }, { "clip_ratio/high_max": 0.002464692723151529, "clip_ratio/high_mean": 0.000978390735326684, "clip_ratio/low_mean": 0.0007694177529629087, "clip_ratio/low_min": 6.687936092930613e-05, "clip_ratio/region_mean": 0.0017478084337199107, "epoch": 6.167930029154519, "grad_norm": 0.12404350936412811, "learning_rate": 1e-06, "loss": -0.0061, "step": 402 }, { "clip_ratio/high_max": 0.0025488484316156246, "clip_ratio/high_mean": 0.0010794915397127625, "clip_ratio/low_mean": 0.0009122902974922908, "clip_ratio/low_min": 5.434190825326368e-05, "clip_ratio/region_mean": 0.001991781849937979, "epoch": 6.1772594752186585, "grad_norm": 0.12119971960783005, "learning_rate": 1e-06, "loss": -0.017, "step": 403 }, { "clip_ratio/high_max": 0.0026436956832185388, "clip_ratio/high_mean": 0.0011907307889487129, "clip_ratio/low_mean": 0.0009121412922468153, "clip_ratio/low_min": 8.524704844603548e-05, "clip_ratio/region_mean": 0.0021028720948379487, "epoch": 6.186588921282799, "grad_norm": 0.1308881640434265, "learning_rate": 1e-06, "loss": -0.0215, "step": 404 }, { "clip_ratio/high_max": 0.002429325701086782, "clip_ratio/high_mean": 0.001083399933122564, "clip_ratio/low_mean": 0.001123972426285036, "clip_ratio/low_min": 6.25228767603403e-05, "clip_ratio/region_mean": 0.0022073723521316424, "epoch": 6.1959183673469385, "grad_norm": 0.1356833130121231, "learning_rate": 1e-06, "loss": 0.0154, "step": 405 }, { "clip_ratio/high_max": 0.002275712904520333, "clip_ratio/high_mean": 0.0009847798719420098, "clip_ratio/low_mean": 0.0008147502612700919, "clip_ratio/low_min": 3.122023645119043e-05, "clip_ratio/region_mean": 0.001799530153220985, "epoch": 6.205247813411079, "grad_norm": 0.11942654848098755, "learning_rate": 1e-06, "loss": -0.0519, "step": 406 }, { "clip_ratio/high_max": 0.0022498919279314578, "clip_ratio/high_mean": 0.0010205811140622245, "clip_ratio/low_mean": 0.001104500805013231, "clip_ratio/low_min": 9.362457240058575e-05, "clip_ratio/region_mean": 0.0021250819190754555, "epoch": 6.214577259475218, "grad_norm": 0.12094379961490631, "learning_rate": 1e-06, "loss": -0.0155, "step": 407 }, { "clip_ratio/high_max": 0.002205601616878994, "clip_ratio/high_mean": 0.0009364961479150224, "clip_ratio/low_mean": 0.0011847007408505306, "clip_ratio/low_min": 0.0001823055117711192, "clip_ratio/region_mean": 0.002121196906955447, "epoch": 6.223906705539359, "grad_norm": 0.12259498238563538, "learning_rate": 1e-06, "loss": 0.0451, "step": 408 }, { "clip_ratio/high_max": 0.0026138292960240506, "clip_ratio/high_mean": 0.001239790661202278, "clip_ratio/low_mean": 0.0012175131996627897, "clip_ratio/low_min": 0.00011217290102649713, "clip_ratio/region_mean": 0.002457303839037195, "epoch": 6.233236151603498, "grad_norm": 0.12996727228164673, "learning_rate": 1e-06, "loss": -0.029, "step": 409 }, { "clip_ratio/high_max": 0.00241059968539048, "clip_ratio/high_mean": 0.0010214820522378432, "clip_ratio/low_mean": 0.001148920736341097, "clip_ratio/low_min": 0.00026637545033736387, "clip_ratio/region_mean": 0.0021704028185922652, "epoch": 6.242565597667639, "grad_norm": 0.12623687088489532, "learning_rate": 1e-06, "loss": 0.0163, "step": 410 }, { "clip_ratio/high_max": 0.0021047641457698774, "clip_ratio/high_mean": 0.0008414694184466498, "clip_ratio/low_mean": 0.0011207731076865457, "clip_ratio/low_min": 0.00013071524972474435, "clip_ratio/region_mean": 0.001962242524314206, "epoch": 6.251895043731778, "grad_norm": 0.11063150316476822, "learning_rate": 1e-06, "loss": 0.0377, "step": 411 }, { "clip_ratio/high_max": 0.002761717958492227, "clip_ratio/high_mean": 0.0011835738114314154, "clip_ratio/low_mean": 0.001144818448665319, "clip_ratio/low_min": 0.00010819083581736777, "clip_ratio/region_mean": 0.002328392227354925, "epoch": 6.261224489795918, "grad_norm": 0.1341608613729477, "learning_rate": 1e-06, "loss": -0.0096, "step": 412 }, { "clip_ratio/high_max": 0.0023905458001536317, "clip_ratio/high_mean": 0.0010264829361403827, "clip_ratio/low_mean": 0.0011098397117166314, "clip_ratio/low_min": 1.5318628356908448e-05, "clip_ratio/region_mean": 0.0021363226769608445, "epoch": 6.270553935860058, "grad_norm": 0.10965313017368317, "learning_rate": 1e-06, "loss": 0.0478, "step": 413 }, { "clip_ratio/high_max": 0.0024112718856486026, "clip_ratio/high_mean": 0.0010530406416364713, "clip_ratio/low_mean": 0.0011679758717946243, "clip_ratio/low_min": 7.482237742806319e-05, "clip_ratio/region_mean": 0.0022210165043361485, "epoch": 6.279883381924198, "grad_norm": 0.12129051983356476, "learning_rate": 1e-06, "loss": -0.0074, "step": 414 }, { "clip_ratio/high_max": 0.0027557728462852538, "clip_ratio/high_mean": 0.001191336185002001, "clip_ratio/low_mean": 0.0011624247908912366, "clip_ratio/low_min": 8.862009144650074e-05, "clip_ratio/region_mean": 0.0023537609740742482, "epoch": 6.289212827988338, "grad_norm": 0.11545326560735703, "learning_rate": 1e-06, "loss": -0.0171, "step": 415 }, { "clip_ratio/high_max": 0.002751666062977165, "clip_ratio/high_mean": 0.0011539407842064975, "clip_ratio/low_mean": 0.0011127577636216301, "clip_ratio/low_min": 9.093844164453913e-05, "clip_ratio/region_mean": 0.002266698553285096, "epoch": 6.298542274052478, "grad_norm": 0.1203823983669281, "learning_rate": 1e-06, "loss": -0.0383, "step": 416 }, { "clip_ratio/high_max": 0.002638985257362947, "clip_ratio/high_mean": 0.0011841504056064878, "clip_ratio/low_mean": 0.0010946346446871758, "clip_ratio/low_min": 7.127127719286364e-05, "clip_ratio/region_mean": 0.0022787850175518543, "epoch": 6.307871720116618, "grad_norm": 0.12271475046873093, "learning_rate": 1e-06, "loss": -0.0433, "step": 417 }, { "clip_ratio/high_max": 0.0027284378520562313, "clip_ratio/high_mean": 0.0012587599994731136, "clip_ratio/low_mean": 0.0010464410588610917, "clip_ratio/low_min": 7.53785625420278e-05, "clip_ratio/region_mean": 0.0023052010001265444, "epoch": 6.317201166180758, "grad_norm": 0.12796494364738464, "learning_rate": 1e-06, "loss": -0.0071, "step": 418 }, { "clip_ratio/high_max": 0.0026533188938628882, "clip_ratio/high_mean": 0.0010761897137854248, "clip_ratio/low_mean": 0.001051126469974406, "clip_ratio/low_min": 5.620411866402719e-05, "clip_ratio/region_mean": 0.0021273161764838733, "epoch": 6.326530612244898, "grad_norm": 0.11977878212928772, "learning_rate": 1e-06, "loss": -0.012, "step": 419 }, { "clip_ratio/high_max": 0.002444995494442992, "clip_ratio/high_mean": 0.0010190890898229554, "clip_ratio/low_mean": 0.0011028128428733908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021219019254203886, "epoch": 6.335860058309038, "grad_norm": 0.12461034208536148, "learning_rate": 1e-06, "loss": -0.0075, "step": 420 }, { "clip_ratio/high_max": 0.002507720455469098, "clip_ratio/high_mean": 0.0011358726369508076, "clip_ratio/low_mean": 0.0011133389180031372, "clip_ratio/low_min": 4.179173538432224e-05, "clip_ratio/region_mean": 0.0022492115676868707, "epoch": 6.345189504373177, "grad_norm": 0.12048563361167908, "learning_rate": 1e-06, "loss": -0.0056, "step": 421 }, { "clip_ratio/high_max": 0.002550481578509789, "clip_ratio/high_mean": 0.0011290810034552123, "clip_ratio/low_mean": 0.0012278427548153559, "clip_ratio/low_min": 0.00010310869220120367, "clip_ratio/region_mean": 0.002356923716433812, "epoch": 6.354518950437318, "grad_norm": 0.1191607341170311, "learning_rate": 1e-06, "loss": -0.0114, "step": 422 }, { "clip_ratio/high_max": 0.0022768615526729263, "clip_ratio/high_mean": 0.0010551393497735262, "clip_ratio/low_mean": 0.0012340135363047011, "clip_ratio/low_min": 0.000130217413243372, "clip_ratio/region_mean": 0.0022891528860782273, "epoch": 6.363848396501457, "grad_norm": 0.11609805375337601, "learning_rate": 1e-06, "loss": 0.0164, "step": 423 }, { "clip_ratio/high_max": 0.002574537407781463, "clip_ratio/high_mean": 0.0011344671584083699, "clip_ratio/low_mean": 0.001283480891288491, "clip_ratio/low_min": 5.9218235037405975e-05, "clip_ratio/region_mean": 0.0024179480387829244, "epoch": 6.373177842565598, "grad_norm": 0.12159954011440277, "learning_rate": 1e-06, "loss": 0.0054, "step": 424 }, { "clip_ratio/high_max": 0.00263553905824665, "clip_ratio/high_mean": 0.0010845652286661789, "clip_ratio/low_mean": 0.0012535936548374593, "clip_ratio/low_min": 4.384953444969142e-05, "clip_ratio/region_mean": 0.0023381588180200197, "epoch": 6.382507288629737, "grad_norm": 0.11567821353673935, "learning_rate": 1e-06, "loss": 0.0074, "step": 425 }, { "clip_ratio/high_max": 0.0024593555863248184, "clip_ratio/high_mean": 0.0011447414435679093, "clip_ratio/low_mean": 0.0012152215676906053, "clip_ratio/low_min": 8.845467073115287e-05, "clip_ratio/region_mean": 0.0023599630876560695, "epoch": 6.391836734693878, "grad_norm": 0.11477188020944595, "learning_rate": 1e-06, "loss": -0.013, "step": 426 }, { "clip_ratio/high_max": 0.002639504578837659, "clip_ratio/high_mean": 0.0012066481067449786, "clip_ratio/low_mean": 0.0010413934905955102, "clip_ratio/low_min": 4.2450270484550856e-05, "clip_ratio/region_mean": 0.002248041651910171, "epoch": 6.401166180758017, "grad_norm": 0.1149083599448204, "learning_rate": 1e-06, "loss": -0.0252, "step": 427 }, { "clip_ratio/high_max": 0.0025213523040292785, "clip_ratio/high_mean": 0.0011777847539633512, "clip_ratio/low_mean": 0.0012336117506492883, "clip_ratio/low_min": 0.00012792700636055088, "clip_ratio/region_mean": 0.0024113965046126395, "epoch": 6.410495626822158, "grad_norm": 0.1340465098619461, "learning_rate": 1e-06, "loss": 0.0161, "step": 428 }, { "clip_ratio/high_max": 0.002063682539301226, "clip_ratio/high_mean": 0.0009665437473813654, "clip_ratio/low_mean": 0.001161672826128779, "clip_ratio/low_min": 9.626071005186532e-05, "clip_ratio/region_mean": 0.0021282165398588404, "epoch": 6.419825072886297, "grad_norm": 0.12817728519439697, "learning_rate": 1e-06, "loss": 0.063, "step": 429 }, { "clip_ratio/high_max": 0.0027733416500268504, "clip_ratio/high_mean": 0.0010656159720383584, "clip_ratio/low_mean": 0.0011418458598200232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002207461810030509, "epoch": 6.429154518950437, "grad_norm": 0.1197596862912178, "learning_rate": 1e-06, "loss": 0.0025, "step": 430 }, { "clip_ratio/high_max": 0.002767742520518368, "clip_ratio/high_mean": 0.0011557379566511372, "clip_ratio/low_mean": 0.0011472212318039965, "clip_ratio/low_min": 8.432554022874683e-05, "clip_ratio/region_mean": 0.0023029592193779536, "epoch": 6.438483965014577, "grad_norm": 0.11540327221155167, "learning_rate": 1e-06, "loss": 0.016, "step": 431 }, { "clip_ratio/high_max": 0.0026349278050474823, "clip_ratio/high_mean": 0.0011248408754909178, "clip_ratio/low_mean": 0.0012321955964580411, "clip_ratio/low_min": 7.574339269922348e-05, "clip_ratio/region_mean": 0.002357036486500874, "epoch": 6.447813411078717, "grad_norm": 0.11919288337230682, "learning_rate": 1e-06, "loss": -0.0046, "step": 432 }, { "clip_ratio/high_max": 0.0024311832567036618, "clip_ratio/high_mean": 0.0010826660882230499, "clip_ratio/low_mean": 0.0011767278829211136, "clip_ratio/low_min": 4.409579651110107e-05, "clip_ratio/region_mean": 0.002259393957501743, "epoch": 6.457142857142857, "grad_norm": 0.12749038636684418, "learning_rate": 1e-06, "loss": -0.0073, "step": 433 }, { "clip_ratio/high_max": 0.002881565764255356, "clip_ratio/high_mean": 0.001180740146082826, "clip_ratio/low_mean": 0.0011400093189877225, "clip_ratio/low_min": 0.00011249742601648904, "clip_ratio/region_mean": 0.002320749481441453, "epoch": 6.466472303206997, "grad_norm": 0.12400262802839279, "learning_rate": 1e-06, "loss": -0.0078, "step": 434 }, { "clip_ratio/high_max": 0.002597883423732128, "clip_ratio/high_mean": 0.001204928175866371, "clip_ratio/low_mean": 0.0011466079904494109, "clip_ratio/low_min": 1.800633799575735e-05, "clip_ratio/region_mean": 0.0023515361026511528, "epoch": 6.475801749271137, "grad_norm": 0.12447620928287506, "learning_rate": 1e-06, "loss": -0.0586, "step": 435 }, { "clip_ratio/high_max": 0.002456329522829037, "clip_ratio/high_mean": 0.0011332140529702883, "clip_ratio/low_mean": 0.0011636990839178907, "clip_ratio/low_min": 0.00010544337419560179, "clip_ratio/region_mean": 0.0022969131605350412, "epoch": 6.485131195335277, "grad_norm": 0.12708230316638947, "learning_rate": 1e-06, "loss": -0.0181, "step": 436 }, { "clip_ratio/high_max": 0.002377366407017689, "clip_ratio/high_mean": 0.001035883195072529, "clip_ratio/low_mean": 0.0014171033435559366, "clip_ratio/low_min": 8.347125458385563e-05, "clip_ratio/region_mean": 0.0024529865258955397, "epoch": 6.494460641399417, "grad_norm": 0.12132944166660309, "learning_rate": 1e-06, "loss": -0.0088, "step": 437 }, { "clip_ratio/high_max": 0.0025897861196426675, "clip_ratio/high_mean": 0.0010773708763736067, "clip_ratio/low_mean": 0.0013784209841105621, "clip_ratio/low_min": 9.457713167648762e-05, "clip_ratio/region_mean": 0.0024557918368373066, "epoch": 6.503790087463557, "grad_norm": 0.13327062129974365, "learning_rate": 1e-06, "loss": 0.0301, "step": 438 }, { "clip_ratio/high_max": 0.002631640942126978, "clip_ratio/high_mean": 0.0011027808650396764, "clip_ratio/low_mean": 0.0013612366092274897, "clip_ratio/low_min": 7.40386876714183e-05, "clip_ratio/region_mean": 0.002464017437887378, "epoch": 6.513119533527696, "grad_norm": 0.12736468017101288, "learning_rate": 1e-06, "loss": 0.0417, "step": 439 }, { "clip_ratio/high_max": 0.002668326771527063, "clip_ratio/high_mean": 0.0011093713037553243, "clip_ratio/low_mean": 0.0013399616182141472, "clip_ratio/low_min": 7.074468885548413e-05, "clip_ratio/region_mean": 0.002449332874675747, "epoch": 6.522448979591837, "grad_norm": 0.12351478636264801, "learning_rate": 1e-06, "loss": 0.0189, "step": 440 }, { "clip_ratio/high_max": 0.0026762376073747873, "clip_ratio/high_mean": 0.0011525229892868083, "clip_ratio/low_mean": 0.0010786298189486843, "clip_ratio/low_min": 0.00013715065870201215, "clip_ratio/region_mean": 0.0022311528300633654, "epoch": 6.531778425655976, "grad_norm": 0.1278199851512909, "learning_rate": 1e-06, "loss": -0.0338, "step": 441 }, { "clip_ratio/high_max": 0.0030020528502063826, "clip_ratio/high_mean": 0.00113365893412265, "clip_ratio/low_mean": 0.0010324991271772888, "clip_ratio/low_min": 0.00012030499237880576, "clip_ratio/region_mean": 0.002166158090403769, "epoch": 6.541107871720117, "grad_norm": 0.1174987331032753, "learning_rate": 1e-06, "loss": -0.0177, "step": 442 }, { "clip_ratio/high_max": 0.0030522443776135333, "clip_ratio/high_mean": 0.001245684783498291, "clip_ratio/low_mean": 0.0011783944974013139, "clip_ratio/low_min": 2.3487411453970708e-05, "clip_ratio/region_mean": 0.0024240792627097107, "epoch": 6.550437317784256, "grad_norm": 0.11356744915246964, "learning_rate": 1e-06, "loss": -0.0356, "step": 443 }, { "clip_ratio/high_max": 0.0026246298439218663, "clip_ratio/high_mean": 0.0011240662479394814, "clip_ratio/low_mean": 0.0013824096095049754, "clip_ratio/low_min": 0.0001541881611046847, "clip_ratio/region_mean": 0.0025064759101951495, "epoch": 6.559766763848397, "grad_norm": 0.11744951456785202, "learning_rate": 1e-06, "loss": 0.0368, "step": 444 }, { "clip_ratio/high_max": 0.0026455251208972186, "clip_ratio/high_mean": 0.001171026855445234, "clip_ratio/low_mean": 0.0012133766613260377, "clip_ratio/low_min": 0.00010664136061677709, "clip_ratio/region_mean": 0.002384403494943399, "epoch": 6.569096209912536, "grad_norm": 0.12163163721561432, "learning_rate": 1e-06, "loss": -0.0115, "step": 445 }, { "clip_ratio/high_max": 0.002699668490095064, "clip_ratio/high_mean": 0.001236726289789658, "clip_ratio/low_mean": 0.0011418564463383518, "clip_ratio/low_min": 8.202681965485681e-05, "clip_ratio/region_mean": 0.0023785828016116284, "epoch": 6.578425655976677, "grad_norm": 0.12173604965209961, "learning_rate": 1e-06, "loss": -0.0022, "step": 446 }, { "clip_ratio/high_max": 0.002826439398631919, "clip_ratio/high_mean": 0.0011543520049599465, "clip_ratio/low_mean": 0.0011969719380431343, "clip_ratio/low_min": 4.8110874558915384e-05, "clip_ratio/region_mean": 0.002351323935727123, "epoch": 6.587755102040816, "grad_norm": 0.12113094329833984, "learning_rate": 1e-06, "loss": -0.0158, "step": 447 }, { "clip_ratio/high_max": 0.002635091499541886, "clip_ratio/high_mean": 0.0011047649331885623, "clip_ratio/low_mean": 0.0013531399727071403, "clip_ratio/low_min": 0.00018112690850102808, "clip_ratio/region_mean": 0.002457904862239957, "epoch": 6.597084548104956, "grad_norm": 0.11818385869264603, "learning_rate": 1e-06, "loss": -0.0143, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028529575892857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 667.4026489257812, "completions/mean_terminated_length": 566.7135620117188, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 7.0093294460641395, "grad_norm": 0.12843579053878784, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 298188831.0, "reward": 0.5926164984703064, "reward_std": 0.19546960294246674, "rewards/simpleverify_reward/mean": 0.5926164984703064, "rewards/simpleverify_reward/std": 0.4913516044616699, "step": 449 }, { "clip_ratio/high_max": 0.0017942120612133294, "clip_ratio/high_mean": 0.0008397825422434835, "clip_ratio/low_mean": 0.0005558612620006897, "clip_ratio/low_min": 3.84697978006443e-05, "clip_ratio/region_mean": 0.0013956437978777103, "epoch": 7.01865889212828, "grad_norm": 0.12819872796535492, "learning_rate": 1e-06, "loss": 0.0123, "step": 450 }, { "clip_ratio/high_max": 0.0018787424378388096, "clip_ratio/high_mean": 0.0007833713934815023, "clip_ratio/low_mean": 0.0005890489737794269, "clip_ratio/low_min": 4.643387728719972e-05, "clip_ratio/region_mean": 0.0013724203672609292, "epoch": 7.0279883381924195, "grad_norm": 0.11226193606853485, "learning_rate": 1e-06, "loss": 0.0025, "step": 451 }, { "clip_ratio/high_max": 0.002255278632219415, "clip_ratio/high_mean": 0.0009132294890150661, "clip_ratio/low_mean": 0.000774587537307525, "clip_ratio/low_min": 9.933398905559443e-05, "clip_ratio/region_mean": 0.00168781699903775, "epoch": 7.03731778425656, "grad_norm": 0.13736703991889954, "learning_rate": 1e-06, "loss": 0.0151, "step": 452 }, { "clip_ratio/high_max": 0.0022158552310429513, "clip_ratio/high_mean": 0.000965459239523625, "clip_ratio/low_mean": 0.0005958840192761272, "clip_ratio/low_min": 1.1770244782383088e-05, "clip_ratio/region_mean": 0.001561343262437731, "epoch": 7.0466472303206995, "grad_norm": 0.11436965316534042, "learning_rate": 1e-06, "loss": 0.0041, "step": 453 }, { "clip_ratio/high_max": 0.0022080572525737807, "clip_ratio/high_mean": 0.0009584508152329363, "clip_ratio/low_mean": 0.0005942661882727407, "clip_ratio/low_min": 2.36720325119677e-05, "clip_ratio/region_mean": 0.0015527170253335498, "epoch": 7.05597667638484, "grad_norm": 0.10365674644708633, "learning_rate": 1e-06, "loss": -0.0316, "step": 454 }, { "clip_ratio/high_max": 0.0024227470566984266, "clip_ratio/high_mean": 0.0009210606094711693, "clip_ratio/low_mean": 0.0007715885913057718, "clip_ratio/low_min": 1.1220825399504974e-05, "clip_ratio/region_mean": 0.0016926492025959305, "epoch": 7.0653061224489795, "grad_norm": 0.13070574402809143, "learning_rate": 1e-06, "loss": 0.0291, "step": 455 }, { "clip_ratio/high_max": 0.002068601446808316, "clip_ratio/high_mean": 0.000858913339470746, "clip_ratio/low_mean": 0.0007577644046250498, "clip_ratio/low_min": 3.540270517987665e-05, "clip_ratio/region_mean": 0.001616677804122446, "epoch": 7.07463556851312, "grad_norm": 0.12022151052951813, "learning_rate": 1e-06, "loss": 0.0323, "step": 456 }, { "clip_ratio/high_max": 0.0020541218982543796, "clip_ratio/high_mean": 0.0009139200956269633, "clip_ratio/low_mean": 0.0008234897231886862, "clip_ratio/low_min": 0.00010106707668455783, "clip_ratio/region_mean": 0.0017374098533764482, "epoch": 7.0839650145772595, "grad_norm": 0.1268029659986496, "learning_rate": 1e-06, "loss": -0.0139, "step": 457 }, { "clip_ratio/high_max": 0.0017584330453246366, "clip_ratio/high_mean": 0.0008227400994655909, "clip_ratio/low_mean": 0.000893424346941174, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001716164442768786, "epoch": 7.093294460641399, "grad_norm": 0.12214168161153793, "learning_rate": 1e-06, "loss": 0.0126, "step": 458 }, { "clip_ratio/high_max": 0.0020603343364200555, "clip_ratio/high_mean": 0.0009572600629326189, "clip_ratio/low_mean": 0.0007520450217270991, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017093050919356756, "epoch": 7.1026239067055394, "grad_norm": 0.13558630645275116, "learning_rate": 1e-06, "loss": -0.0098, "step": 459 }, { "clip_ratio/high_max": 0.00220011221972527, "clip_ratio/high_mean": 0.0009486302678851644, "clip_ratio/low_mean": 0.0010386598223703913, "clip_ratio/low_min": 1.4434180229727644e-05, "clip_ratio/region_mean": 0.001987290110264439, "epoch": 7.111953352769679, "grad_norm": 0.11543698608875275, "learning_rate": 1e-06, "loss": 0.0081, "step": 460 }, { "clip_ratio/high_max": 0.002159191975806607, "clip_ratio/high_mean": 0.0010031382134911837, "clip_ratio/low_mean": 0.0009441756719752448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019473139109322801, "epoch": 7.121282798833819, "grad_norm": 0.12990941107273102, "learning_rate": 1e-06, "loss": 0.0156, "step": 461 }, { "clip_ratio/high_max": 0.002285688191477675, "clip_ratio/high_mean": 0.0010069564887089655, "clip_ratio/low_mean": 0.0009788917122932617, "clip_ratio/low_min": 5.853668335475959e-05, "clip_ratio/region_mean": 0.0019858481900882907, "epoch": 7.130612244897959, "grad_norm": 0.12387382984161377, "learning_rate": 1e-06, "loss": 0.012, "step": 462 }, { "clip_ratio/high_max": 0.0023053531476762146, "clip_ratio/high_mean": 0.001027273538056761, "clip_ratio/low_mean": 0.0009512579526926856, "clip_ratio/low_min": 4.76672485092422e-05, "clip_ratio/region_mean": 0.001978531538043171, "epoch": 7.139941690962099, "grad_norm": 0.12219683080911636, "learning_rate": 1e-06, "loss": -0.0006, "step": 463 }, { "clip_ratio/high_max": 0.0023550383702968247, "clip_ratio/high_mean": 0.0009040625118359458, "clip_ratio/low_mean": 0.0010384545057604555, "clip_ratio/low_min": 6.813026629970409e-05, "clip_ratio/region_mean": 0.0019425170175964013, "epoch": 7.149271137026239, "grad_norm": 0.1295541226863861, "learning_rate": 1e-06, "loss": 0.0464, "step": 464 }, { "clip_ratio/high_max": 0.0028461072870413773, "clip_ratio/high_mean": 0.0012132468764320947, "clip_ratio/low_mean": 0.0009434288076590747, "clip_ratio/low_min": 7.763068970234599e-05, "clip_ratio/region_mean": 0.0021566756477113813, "epoch": 7.158600583090379, "grad_norm": 0.12018097937107086, "learning_rate": 1e-06, "loss": -0.0533, "step": 465 }, { "clip_ratio/high_max": 0.002219119167421013, "clip_ratio/high_mean": 0.0009892565085465321, "clip_ratio/low_mean": 0.0009505505477136467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019398070908209775, "epoch": 7.167930029154519, "grad_norm": 0.11691570281982422, "learning_rate": 1e-06, "loss": -0.0212, "step": 466 }, { "clip_ratio/high_max": 0.0024940534567576833, "clip_ratio/high_mean": 0.0009516580576018896, "clip_ratio/low_mean": 0.0009481899232923752, "clip_ratio/low_min": 2.4400567781412974e-05, "clip_ratio/region_mean": 0.001899847928143572, "epoch": 7.1772594752186585, "grad_norm": 0.12506066262722015, "learning_rate": 1e-06, "loss": 0.0465, "step": 467 }, { "clip_ratio/high_max": 0.002621080646349583, "clip_ratio/high_mean": 0.0011659690462693106, "clip_ratio/low_mean": 0.0008470480552205117, "clip_ratio/low_min": 2.7496033908391837e-05, "clip_ratio/region_mean": 0.0020130171105847694, "epoch": 7.186588921282799, "grad_norm": 0.1221906766295433, "learning_rate": 1e-06, "loss": -0.0348, "step": 468 }, { "clip_ratio/high_max": 0.0028215171914780512, "clip_ratio/high_mean": 0.001101287591154687, "clip_ratio/low_mean": 0.0010750167948572198, "clip_ratio/low_min": 5.850890011060983e-05, "clip_ratio/region_mean": 0.0021763044060207903, "epoch": 7.1959183673469385, "grad_norm": 0.12542860209941864, "learning_rate": 1e-06, "loss": -0.0076, "step": 469 }, { "clip_ratio/high_max": 0.002720331212913152, "clip_ratio/high_mean": 0.0012087243449059315, "clip_ratio/low_mean": 0.0007662919597350992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001975016319192946, "epoch": 7.205247813411079, "grad_norm": 0.11854458600282669, "learning_rate": 1e-06, "loss": -0.078, "step": 470 }, { "clip_ratio/high_max": 0.0022921818599570543, "clip_ratio/high_mean": 0.0010176220603170805, "clip_ratio/low_mean": 0.0010260480739816558, "clip_ratio/low_min": 5.1201528549427167e-05, "clip_ratio/region_mean": 0.0020436701088328846, "epoch": 7.214577259475218, "grad_norm": 0.11927090585231781, "learning_rate": 1e-06, "loss": -0.0112, "step": 471 }, { "clip_ratio/high_max": 0.0028679999231826514, "clip_ratio/high_mean": 0.0012150522015872411, "clip_ratio/low_mean": 0.0009577400123816915, "clip_ratio/low_min": 4.991071546101011e-05, "clip_ratio/region_mean": 0.0021727922212448902, "epoch": 7.223906705539359, "grad_norm": 0.11966941505670547, "learning_rate": 1e-06, "loss": -0.0254, "step": 472 }, { "clip_ratio/high_max": 0.00237848923279671, "clip_ratio/high_mean": 0.000964981853030622, "clip_ratio/low_mean": 0.001007246806693729, "clip_ratio/low_min": 3.565316728781909e-05, "clip_ratio/region_mean": 0.0019722286670003086, "epoch": 7.233236151603498, "grad_norm": 0.11110007017850876, "learning_rate": 1e-06, "loss": -0.0005, "step": 473 }, { "clip_ratio/high_max": 0.002525853124097921, "clip_ratio/high_mean": 0.0012086549195373664, "clip_ratio/low_mean": 0.0010972887157549849, "clip_ratio/low_min": 7.160093355196295e-05, "clip_ratio/region_mean": 0.0023059436280163936, "epoch": 7.242565597667639, "grad_norm": 0.14252908527851105, "learning_rate": 1e-06, "loss": 0.0043, "step": 474 }, { "clip_ratio/high_max": 0.0023088748566806316, "clip_ratio/high_mean": 0.001019692297631991, "clip_ratio/low_mean": 0.001102666345104808, "clip_ratio/low_min": 4.645337685360573e-05, "clip_ratio/region_mean": 0.0021223586081760004, "epoch": 7.251895043731778, "grad_norm": 0.11959218978881836, "learning_rate": 1e-06, "loss": 0.0049, "step": 475 }, { "clip_ratio/high_max": 0.002331751282326877, "clip_ratio/high_mean": 0.0010360079450038029, "clip_ratio/low_mean": 0.0011968514390900964, "clip_ratio/low_min": 0.0002064881800833973, "clip_ratio/region_mean": 0.002232859391369857, "epoch": 7.261224489795918, "grad_norm": 0.11894406378269196, "learning_rate": 1e-06, "loss": 0.0146, "step": 476 }, { "clip_ratio/high_max": 0.002595975158328656, "clip_ratio/high_mean": 0.0011571917748369742, "clip_ratio/low_mean": 0.0012506505263445433, "clip_ratio/low_min": 0.00010009974721469916, "clip_ratio/region_mean": 0.0024078422793536447, "epoch": 7.270553935860058, "grad_norm": 0.12460054457187653, "learning_rate": 1e-06, "loss": -0.0071, "step": 477 }, { "clip_ratio/high_max": 0.0028466668009059504, "clip_ratio/high_mean": 0.0011999497983197216, "clip_ratio/low_mean": 0.001092759595849202, "clip_ratio/low_min": 0.00011529072799021378, "clip_ratio/region_mean": 0.0022927094250917435, "epoch": 7.279883381924198, "grad_norm": 0.11852549016475677, "learning_rate": 1e-06, "loss": -0.0332, "step": 478 }, { "clip_ratio/high_max": 0.002350836235564202, "clip_ratio/high_mean": 0.0010577016655588523, "clip_ratio/low_mean": 0.0012045312541886233, "clip_ratio/low_min": 4.7819434257689863e-05, "clip_ratio/region_mean": 0.0022622328833676875, "epoch": 7.289212827988338, "grad_norm": 0.11749444901943207, "learning_rate": 1e-06, "loss": 0.0065, "step": 479 }, { "clip_ratio/high_max": 0.0023864907198003493, "clip_ratio/high_mean": 0.0010245456105621997, "clip_ratio/low_mean": 0.0011411018876970047, "clip_ratio/low_min": 7.487407674489077e-05, "clip_ratio/region_mean": 0.002165647536457982, "epoch": 7.298542274052478, "grad_norm": 0.11341959983110428, "learning_rate": 1e-06, "loss": -0.0399, "step": 480 }, { "clip_ratio/high_max": 0.00280179691981175, "clip_ratio/high_mean": 0.001034307469126361, "clip_ratio/low_mean": 0.0012337325679254718, "clip_ratio/low_min": 0.0002653695974004222, "clip_ratio/region_mean": 0.0022680400652461685, "epoch": 7.307871720116618, "grad_norm": 0.11593709141016006, "learning_rate": 1e-06, "loss": 0.0082, "step": 481 }, { "clip_ratio/high_max": 0.002634935633977875, "clip_ratio/high_mean": 0.0012052946658513974, "clip_ratio/low_mean": 0.0011131834107800387, "clip_ratio/low_min": 5.8690147852757946e-05, "clip_ratio/region_mean": 0.002318478080269415, "epoch": 7.317201166180758, "grad_norm": 0.15193410217761993, "learning_rate": 1e-06, "loss": -0.044, "step": 482 }, { "clip_ratio/high_max": 0.0024060449650278315, "clip_ratio/high_mean": 0.0011861616112582851, "clip_ratio/low_mean": 0.00122487666158122, "clip_ratio/low_min": 8.172049638233148e-05, "clip_ratio/region_mean": 0.0024110383092192933, "epoch": 7.326530612244898, "grad_norm": 0.12320085614919662, "learning_rate": 1e-06, "loss": -0.0204, "step": 483 }, { "clip_ratio/high_max": 0.002540133020374924, "clip_ratio/high_mean": 0.0009502205120952567, "clip_ratio/low_mean": 0.0013773989558103494, "clip_ratio/low_min": 0.00021999615364620695, "clip_ratio/region_mean": 0.0023276194187928922, "epoch": 7.335860058309038, "grad_norm": 0.11977312713861465, "learning_rate": 1e-06, "loss": 0.0406, "step": 484 }, { "clip_ratio/high_max": 0.002486824589141179, "clip_ratio/high_mean": 0.001049243288434809, "clip_ratio/low_mean": 0.0010668040977179771, "clip_ratio/low_min": 0.00011340638320689322, "clip_ratio/region_mean": 0.0021160473697818816, "epoch": 7.345189504373177, "grad_norm": 0.12062793225049973, "learning_rate": 1e-06, "loss": -0.0119, "step": 485 }, { "clip_ratio/high_max": 0.00300940345186973, "clip_ratio/high_mean": 0.001183753051009262, "clip_ratio/low_mean": 0.0011702056508511305, "clip_ratio/low_min": 4.2600540837156586e-05, "clip_ratio/region_mean": 0.002353958727326244, "epoch": 7.354518950437318, "grad_norm": 0.1330283284187317, "learning_rate": 1e-06, "loss": -0.03, "step": 486 }, { "clip_ratio/high_max": 0.0028180379085824825, "clip_ratio/high_mean": 0.0011440986272646114, "clip_ratio/low_mean": 0.0011010474700015038, "clip_ratio/low_min": 4.2307438889110927e-05, "clip_ratio/region_mean": 0.0022451460899901576, "epoch": 7.363848396501457, "grad_norm": 0.12328824400901794, "learning_rate": 1e-06, "loss": -0.0719, "step": 487 }, { "clip_ratio/high_max": 0.0022763478991691954, "clip_ratio/high_mean": 0.0009702771349111572, "clip_ratio/low_mean": 0.0012585149852384347, "clip_ratio/low_min": 0.00011112267930002417, "clip_ratio/region_mean": 0.0022287920728558674, "epoch": 7.373177842565598, "grad_norm": 0.11739755421876907, "learning_rate": 1e-06, "loss": 0.021, "step": 488 }, { "clip_ratio/high_max": 0.002318337545148097, "clip_ratio/high_mean": 0.0008995488115033368, "clip_ratio/low_mean": 0.0012490651024563704, "clip_ratio/low_min": 0.00013070694421912776, "clip_ratio/region_mean": 0.002148613908502739, "epoch": 7.382507288629737, "grad_norm": 0.12185444682836533, "learning_rate": 1e-06, "loss": 0.0232, "step": 489 }, { "clip_ratio/high_max": 0.002299207051692065, "clip_ratio/high_mean": 0.0010201399891229812, "clip_ratio/low_mean": 0.0012904405848530587, "clip_ratio/low_min": 7.079569604684366e-05, "clip_ratio/region_mean": 0.002310580624907743, "epoch": 7.391836734693878, "grad_norm": 0.13964992761611938, "learning_rate": 1e-06, "loss": 0.0224, "step": 490 }, { "clip_ratio/high_max": 0.0024046644393820316, "clip_ratio/high_mean": 0.0009953425542335026, "clip_ratio/low_mean": 0.0012848304431827273, "clip_ratio/low_min": 9.583777227817336e-05, "clip_ratio/region_mean": 0.002280172942846548, "epoch": 7.401166180758017, "grad_norm": 0.1222560703754425, "learning_rate": 1e-06, "loss": -0.0026, "step": 491 }, { "clip_ratio/high_max": 0.002676264019100927, "clip_ratio/high_mean": 0.0011157149474456673, "clip_ratio/low_mean": 0.00140094916059752, "clip_ratio/low_min": 9.902396141114878e-05, "clip_ratio/region_mean": 0.0025166641717078164, "epoch": 7.410495626822158, "grad_norm": 0.11028287559747696, "learning_rate": 1e-06, "loss": 0.0072, "step": 492 }, { "clip_ratio/high_max": 0.0022441234323196113, "clip_ratio/high_mean": 0.0008991280119516887, "clip_ratio/low_mean": 0.0012561939838633407, "clip_ratio/low_min": 7.770371576043544e-05, "clip_ratio/region_mean": 0.002155322006728966, "epoch": 7.419825072886297, "grad_norm": 0.1132696196436882, "learning_rate": 1e-06, "loss": 0.022, "step": 493 }, { "clip_ratio/high_max": 0.0025834246989688836, "clip_ratio/high_mean": 0.001081963720935164, "clip_ratio/low_mean": 0.0013424667704384774, "clip_ratio/low_min": 0.00011595202522585168, "clip_ratio/region_mean": 0.002424430502287578, "epoch": 7.429154518950437, "grad_norm": 0.11117098480463028, "learning_rate": 1e-06, "loss": 0.0305, "step": 494 }, { "clip_ratio/high_max": 0.002640688711835537, "clip_ratio/high_mean": 0.0011757747306546662, "clip_ratio/low_mean": 0.0010532915075600613, "clip_ratio/low_min": 6.40508187643718e-05, "clip_ratio/region_mean": 0.002229066281870473, "epoch": 7.438483965014577, "grad_norm": 0.12091293931007385, "learning_rate": 1e-06, "loss": -0.038, "step": 495 }, { "clip_ratio/high_max": 0.0028101488860556856, "clip_ratio/high_mean": 0.001151785887486767, "clip_ratio/low_mean": 0.0011464080089353956, "clip_ratio/low_min": 5.842746213602368e-05, "clip_ratio/region_mean": 0.002298193867318332, "epoch": 7.447813411078717, "grad_norm": 0.11562146991491318, "learning_rate": 1e-06, "loss": -0.0246, "step": 496 }, { "clip_ratio/high_max": 0.0026234120814478956, "clip_ratio/high_mean": 0.0010815629375429126, "clip_ratio/low_mean": 0.00119368602099712, "clip_ratio/low_min": 6.950084389245603e-05, "clip_ratio/region_mean": 0.002275248960359022, "epoch": 7.457142857142857, "grad_norm": 0.11595509201288223, "learning_rate": 1e-06, "loss": -0.0112, "step": 497 }, { "clip_ratio/high_max": 0.0029682919339393266, "clip_ratio/high_mean": 0.001281288899917854, "clip_ratio/low_mean": 0.0010473563852428924, "clip_ratio/low_min": 7.987911521922797e-05, "clip_ratio/region_mean": 0.0023286452633328736, "epoch": 7.466472303206997, "grad_norm": 0.13541395962238312, "learning_rate": 1e-06, "loss": 0.0254, "step": 498 }, { "clip_ratio/high_max": 0.00292497353075305, "clip_ratio/high_mean": 0.001249731027201051, "clip_ratio/low_mean": 0.0012320617788645905, "clip_ratio/low_min": 2.4895438400562853e-05, "clip_ratio/region_mean": 0.002481792827893514, "epoch": 7.475801749271137, "grad_norm": 0.12132713943719864, "learning_rate": 1e-06, "loss": 0.0039, "step": 499 }, { "clip_ratio/high_max": 0.0030203382921172306, "clip_ratio/high_mean": 0.0012031479163852055, "clip_ratio/low_mean": 0.0010051077952084597, "clip_ratio/low_min": 0.00012008448720735032, "clip_ratio/region_mean": 0.0022082556897657923, "epoch": 7.485131195335277, "grad_norm": 0.13366498053073883, "learning_rate": 1e-06, "loss": -0.0249, "step": 500 }, { "clip_ratio/high_max": 0.002366674401855562, "clip_ratio/high_mean": 0.0011580788050196134, "clip_ratio/low_mean": 0.0011073204150306992, "clip_ratio/low_min": 3.966103213315364e-05, "clip_ratio/region_mean": 0.0022653992564301006, "epoch": 7.494460641399417, "grad_norm": 0.1229439452290535, "learning_rate": 1e-06, "loss": -0.002, "step": 501 }, { "clip_ratio/high_max": 0.0026265027190675028, "clip_ratio/high_mean": 0.0010871550020965515, "clip_ratio/low_mean": 0.001378980334266089, "clip_ratio/low_min": 9.8238020655117e-05, "clip_ratio/region_mean": 0.002466135352733545, "epoch": 7.503790087463557, "grad_norm": 0.12062812596559525, "learning_rate": 1e-06, "loss": 0.0455, "step": 502 }, { "clip_ratio/high_max": 0.0027687020337907597, "clip_ratio/high_mean": 0.0010825806493812706, "clip_ratio/low_mean": 0.0011020002893928904, "clip_ratio/low_min": 5.1435548812150955e-05, "clip_ratio/region_mean": 0.0021845809460501187, "epoch": 7.513119533527696, "grad_norm": 0.11830519884824753, "learning_rate": 1e-06, "loss": -0.0148, "step": 503 }, { "clip_ratio/high_max": 0.0025952173164114356, "clip_ratio/high_mean": 0.001042579371642205, "clip_ratio/low_mean": 0.0011546709829417523, "clip_ratio/low_min": 7.714566891081631e-05, "clip_ratio/region_mean": 0.0021972503236611374, "epoch": 7.522448979591837, "grad_norm": 0.13077348470687866, "learning_rate": 1e-06, "loss": 0.0284, "step": 504 }, { "clip_ratio/high_max": 0.002429267842671834, "clip_ratio/high_mean": 0.0010484676458872855, "clip_ratio/low_mean": 0.0013895470146962907, "clip_ratio/low_min": 5.0701567488431465e-05, "clip_ratio/region_mean": 0.0024380146569455974, "epoch": 7.531778425655976, "grad_norm": 0.12644539773464203, "learning_rate": 1e-06, "loss": 0.0141, "step": 505 }, { "clip_ratio/high_max": 0.00278996788256336, "clip_ratio/high_mean": 0.0012562477604660671, "clip_ratio/low_mean": 0.0014506569168588612, "clip_ratio/low_min": 5.539823541766964e-05, "clip_ratio/region_mean": 0.0027069046191172674, "epoch": 7.541107871720117, "grad_norm": 0.13555505871772766, "learning_rate": 1e-06, "loss": 0.0127, "step": 506 }, { "clip_ratio/high_max": 0.00280632284557214, "clip_ratio/high_mean": 0.0011085463847848587, "clip_ratio/low_mean": 0.0013826628965034615, "clip_ratio/low_min": 0.00018379312132310588, "clip_ratio/region_mean": 0.0024912092194426805, "epoch": 7.550437317784256, "grad_norm": 0.12285593152046204, "learning_rate": 1e-06, "loss": -0.0052, "step": 507 }, { "clip_ratio/high_max": 0.0026030231747427024, "clip_ratio/high_mean": 0.0011438296223786892, "clip_ratio/low_mean": 0.0013101555305183865, "clip_ratio/low_min": 0.00020059679991391022, "clip_ratio/region_mean": 0.0024539851438021287, "epoch": 7.559766763848397, "grad_norm": 0.12937642633914948, "learning_rate": 1e-06, "loss": -0.0163, "step": 508 }, { "clip_ratio/high_max": 0.002509424688469153, "clip_ratio/high_mean": 0.0010707279761845712, "clip_ratio/low_mean": 0.0013914137198298704, "clip_ratio/low_min": 0.0002121368579537375, "clip_ratio/region_mean": 0.0024621416814625263, "epoch": 7.569096209912536, "grad_norm": 0.1315005123615265, "learning_rate": 1e-06, "loss": 0.0272, "step": 509 }, { "clip_ratio/high_max": 0.002701772638829425, "clip_ratio/high_mean": 0.0012115241843275726, "clip_ratio/low_mean": 0.0012911251669720514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002502649374946486, "epoch": 7.578425655976677, "grad_norm": 0.12416743487119675, "learning_rate": 1e-06, "loss": -0.0084, "step": 510 }, { "clip_ratio/high_max": 0.002634147080243565, "clip_ratio/high_mean": 0.00117255356417445, "clip_ratio/low_mean": 0.0011262230727879796, "clip_ratio/low_min": 6.877614850964164e-05, "clip_ratio/region_mean": 0.002298776584211737, "epoch": 7.587755102040816, "grad_norm": 0.11074145138263702, "learning_rate": 1e-06, "loss": -0.0347, "step": 511 }, { "clip_ratio/high_max": 0.0025929158146027476, "clip_ratio/high_mean": 0.001219710407895036, "clip_ratio/low_mean": 0.001214678086398635, "clip_ratio/low_min": 0.00012000472815998364, "clip_ratio/region_mean": 0.0024343884870177135, "epoch": 7.597084548104956, "grad_norm": 0.11801495403051376, "learning_rate": 1e-06, "loss": -0.005, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0298549107142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 664.7576904296875, "completions/mean_terminated_length": 559.1658325195312, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 8.00932944606414, "grad_norm": 0.1283450722694397, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 335048009.0, "reward": 0.601318359375, "reward_std": 0.1860881894826889, "rewards/simpleverify_reward/mean": 0.601318359375, "rewards/simpleverify_reward/std": 0.48963120579719543, "step": 513 }, { "clip_ratio/high_max": 0.002308113791514188, "clip_ratio/high_mean": 0.0008192843579308828, "clip_ratio/low_mean": 0.0005590575974565581, "clip_ratio/low_min": 2.5047583221748937e-05, "clip_ratio/region_mean": 0.0013783419344690628, "epoch": 8.018658892128279, "grad_norm": 0.1203589215874672, "learning_rate": 1e-06, "loss": -0.0165, "step": 514 }, { "clip_ratio/high_max": 0.0021161512413527817, "clip_ratio/high_mean": 0.0008925096390157705, "clip_ratio/low_mean": 0.000621844644228986, "clip_ratio/low_min": 6.628180472034728e-05, "clip_ratio/region_mean": 0.0015143542623263784, "epoch": 8.02798833819242, "grad_norm": 0.126325324177742, "learning_rate": 1e-06, "loss": 0.0157, "step": 515 }, { "clip_ratio/high_max": 0.0024138694352586754, "clip_ratio/high_mean": 0.0009235054749296978, "clip_ratio/low_mean": 0.0005920641942793736, "clip_ratio/low_min": 3.052536521863658e-05, "clip_ratio/region_mean": 0.0015155696528381668, "epoch": 8.03731778425656, "grad_norm": 0.11950349062681198, "learning_rate": 1e-06, "loss": -0.0263, "step": 516 }, { "clip_ratio/high_max": 0.0024672703802934848, "clip_ratio/high_mean": 0.0009641834167268826, "clip_ratio/low_mean": 0.0006728258522343822, "clip_ratio/low_min": 2.986503386637196e-05, "clip_ratio/region_mean": 0.0016370092198485509, "epoch": 8.0466472303207, "grad_norm": 0.11088596284389496, "learning_rate": 1e-06, "loss": 0.0094, "step": 517 }, { "clip_ratio/high_max": 0.0023653750286030117, "clip_ratio/high_mean": 0.0008969077725851093, "clip_ratio/low_mean": 0.0006713878083246527, "clip_ratio/low_min": 6.067686717869947e-05, "clip_ratio/region_mean": 0.0015682956000091508, "epoch": 8.055976676384839, "grad_norm": 0.1267935335636139, "learning_rate": 1e-06, "loss": -0.0151, "step": 518 }, { "clip_ratio/high_max": 0.002127373474650085, "clip_ratio/high_mean": 0.0008364786281163106, "clip_ratio/low_mean": 0.000634467440249864, "clip_ratio/low_min": 6.502617270598421e-05, "clip_ratio/region_mean": 0.0014709460447193123, "epoch": 8.06530612244898, "grad_norm": 0.13391147553920746, "learning_rate": 1e-06, "loss": -0.0103, "step": 519 }, { "clip_ratio/high_max": 0.0022948138357605785, "clip_ratio/high_mean": 0.001019012230244698, "clip_ratio/low_mean": 0.000763603351515485, "clip_ratio/low_min": 3.301637480035424e-05, "clip_ratio/region_mean": 0.001782615545380395, "epoch": 8.07463556851312, "grad_norm": 0.12723638117313385, "learning_rate": 1e-06, "loss": -0.0179, "step": 520 }, { "clip_ratio/high_max": 0.0023748731291561853, "clip_ratio/high_mean": 0.0009810123592615128, "clip_ratio/low_mean": 0.0009004772473417688, "clip_ratio/low_min": 8.037665793381166e-05, "clip_ratio/region_mean": 0.0018814895738614723, "epoch": 8.08396501457726, "grad_norm": 0.1290731132030487, "learning_rate": 1e-06, "loss": 0.0121, "step": 521 }, { "clip_ratio/high_max": 0.002102063983329572, "clip_ratio/high_mean": 0.0009693754291220102, "clip_ratio/low_mean": 0.0007923240900709061, "clip_ratio/low_min": 4.505354900175007e-05, "clip_ratio/region_mean": 0.0017616995246498846, "epoch": 8.093294460641399, "grad_norm": 0.12019843608140945, "learning_rate": 1e-06, "loss": 0.0361, "step": 522 }, { "clip_ratio/high_max": 0.0025198195362463593, "clip_ratio/high_mean": 0.0010217487033514772, "clip_ratio/low_mean": 0.0008695691230968805, "clip_ratio/low_min": 6.385423421306768e-05, "clip_ratio/region_mean": 0.0018913177846116014, "epoch": 8.102623906705539, "grad_norm": 0.11757130920886993, "learning_rate": 1e-06, "loss": -0.0083, "step": 523 }, { "clip_ratio/high_max": 0.0026558830504654907, "clip_ratio/high_mean": 0.0010986463385052048, "clip_ratio/low_mean": 0.0007407835273625096, "clip_ratio/low_min": 5.301088822307065e-05, "clip_ratio/region_mean": 0.001839429882238619, "epoch": 8.11195335276968, "grad_norm": 0.1299556940793991, "learning_rate": 1e-06, "loss": -0.0375, "step": 524 }, { "clip_ratio/high_max": 0.0027868784964084625, "clip_ratio/high_mean": 0.0012555852845252957, "clip_ratio/low_mean": 0.0009408311379957013, "clip_ratio/low_min": 8.384651482629124e-05, "clip_ratio/region_mean": 0.002196416440710891, "epoch": 8.12128279883382, "grad_norm": 0.20711393654346466, "learning_rate": 1e-06, "loss": -0.0083, "step": 525 }, { "clip_ratio/high_max": 0.0025017392545123585, "clip_ratio/high_mean": 0.0010335601491533453, "clip_ratio/low_mean": 0.000903732990991557, "clip_ratio/low_min": 5.938757021795027e-05, "clip_ratio/region_mean": 0.0019372931201360188, "epoch": 8.130612244897959, "grad_norm": 0.13137264549732208, "learning_rate": 1e-06, "loss": -0.0095, "step": 526 }, { "clip_ratio/high_max": 0.002261149580590427, "clip_ratio/high_mean": 0.0009380212159157963, "clip_ratio/low_mean": 0.0009593696158844978, "clip_ratio/low_min": 0.00012619020617421484, "clip_ratio/region_mean": 0.0018973908154293895, "epoch": 8.139941690962099, "grad_norm": 0.12289122492074966, "learning_rate": 1e-06, "loss": -0.02, "step": 527 }, { "clip_ratio/high_max": 0.0023602232613484375, "clip_ratio/high_mean": 0.0009294587598560611, "clip_ratio/low_mean": 0.0008977753086583107, "clip_ratio/low_min": 6.272373320825864e-05, "clip_ratio/region_mean": 0.0018272340821567923, "epoch": 8.14927113702624, "grad_norm": 0.11843208223581314, "learning_rate": 1e-06, "loss": 0.0021, "step": 528 }, { "clip_ratio/high_max": 0.0027792146502179094, "clip_ratio/high_mean": 0.0011187990567123052, "clip_ratio/low_mean": 0.0010161497466469882, "clip_ratio/low_min": 8.624043675808934e-05, "clip_ratio/region_mean": 0.002134948816092219, "epoch": 8.15860058309038, "grad_norm": 0.14305076003074646, "learning_rate": 1e-06, "loss": 0.0006, "step": 529 }, { "clip_ratio/high_max": 0.002307569797267206, "clip_ratio/high_mean": 0.0009824995304370532, "clip_ratio/low_mean": 0.0009696361557871569, "clip_ratio/low_min": 8.904736841941485e-05, "clip_ratio/region_mean": 0.0019521356298355386, "epoch": 8.167930029154519, "grad_norm": 0.11763215065002441, "learning_rate": 1e-06, "loss": -0.0178, "step": 530 }, { "clip_ratio/high_max": 0.002417347248410806, "clip_ratio/high_mean": 0.0010127573004865553, "clip_ratio/low_mean": 0.0010786624043248594, "clip_ratio/low_min": 6.551867681992007e-05, "clip_ratio/region_mean": 0.002091419737553224, "epoch": 8.177259475218658, "grad_norm": 0.12113494426012039, "learning_rate": 1e-06, "loss": -0.0008, "step": 531 }, { "clip_ratio/high_max": 0.0026721061367425136, "clip_ratio/high_mean": 0.001091474034183193, "clip_ratio/low_mean": 0.0009800746793189319, "clip_ratio/low_min": 6.606509850826114e-05, "clip_ratio/region_mean": 0.0020715487116831355, "epoch": 8.186588921282798, "grad_norm": 0.12066452950239182, "learning_rate": 1e-06, "loss": -0.0374, "step": 532 }, { "clip_ratio/high_max": 0.0024120587113429792, "clip_ratio/high_mean": 0.0009337450119346613, "clip_ratio/low_mean": 0.0010737554603110766, "clip_ratio/low_min": 0.00010615810242597945, "clip_ratio/region_mean": 0.0020075004940736108, "epoch": 8.19591836734694, "grad_norm": 0.12330883741378784, "learning_rate": 1e-06, "loss": 0.0088, "step": 533 }, { "clip_ratio/high_max": 0.0024619104879093356, "clip_ratio/high_mean": 0.0009373362427140819, "clip_ratio/low_mean": 0.001089069301087875, "clip_ratio/low_min": 0.00020098720142414095, "clip_ratio/region_mean": 0.0020264055565348826, "epoch": 8.205247813411079, "grad_norm": 0.12495143711566925, "learning_rate": 1e-06, "loss": 0.0387, "step": 534 }, { "clip_ratio/high_max": 0.0021820884139742702, "clip_ratio/high_mean": 0.001043860804202268, "clip_ratio/low_mean": 0.001224691775860265, "clip_ratio/low_min": 8.937551865528803e-05, "clip_ratio/region_mean": 0.0022685526782879606, "epoch": 8.214577259475218, "grad_norm": 0.12983602285385132, "learning_rate": 1e-06, "loss": -0.0295, "step": 535 }, { "clip_ratio/high_max": 0.0024420370391453616, "clip_ratio/high_mean": 0.0009962133681256091, "clip_ratio/low_mean": 0.0011644633668765891, "clip_ratio/low_min": 4.2788336031662766e-05, "clip_ratio/region_mean": 0.002160676784114912, "epoch": 8.223906705539358, "grad_norm": 0.12035165727138519, "learning_rate": 1e-06, "loss": 0.0106, "step": 536 }, { "clip_ratio/high_max": 0.0026347746461397037, "clip_ratio/high_mean": 0.0010284888685418991, "clip_ratio/low_mean": 0.001169606843177462, "clip_ratio/low_min": 6.066674359317403e-05, "clip_ratio/region_mean": 0.002198095760832075, "epoch": 8.2332361516035, "grad_norm": 0.11909116059541702, "learning_rate": 1e-06, "loss": -0.0095, "step": 537 }, { "clip_ratio/high_max": 0.002415731898508966, "clip_ratio/high_mean": 0.0009782180168258492, "clip_ratio/low_mean": 0.0012490935987443663, "clip_ratio/low_min": 7.025418744888157e-05, "clip_ratio/region_mean": 0.002227311626484152, "epoch": 8.242565597667639, "grad_norm": 0.11141186952590942, "learning_rate": 1e-06, "loss": 0.0441, "step": 538 }, { "clip_ratio/high_max": 0.003209587899618782, "clip_ratio/high_mean": 0.0011492324210848892, "clip_ratio/low_mean": 0.0010563824962446233, "clip_ratio/low_min": 7.435286534018815e-05, "clip_ratio/region_mean": 0.002205614946433343, "epoch": 8.251895043731778, "grad_norm": 0.12210160493850708, "learning_rate": 1e-06, "loss": -0.0027, "step": 539 }, { "clip_ratio/high_max": 0.0024030416971072555, "clip_ratio/high_mean": 0.0010791882777994033, "clip_ratio/low_mean": 0.0011722274102794472, "clip_ratio/low_min": 7.202471897471696e-05, "clip_ratio/region_mean": 0.002251415731734596, "epoch": 8.261224489795918, "grad_norm": 0.11297818273305893, "learning_rate": 1e-06, "loss": -0.002, "step": 540 }, { "clip_ratio/high_max": 0.0027120920858578756, "clip_ratio/high_mean": 0.0010439105390105397, "clip_ratio/low_mean": 0.0011526052039698698, "clip_ratio/low_min": 0.00010454573930473998, "clip_ratio/region_mean": 0.002196515735704452, "epoch": 8.270553935860057, "grad_norm": 0.1224856972694397, "learning_rate": 1e-06, "loss": 0.0208, "step": 541 }, { "clip_ratio/high_max": 0.0023931062205519993, "clip_ratio/high_mean": 0.0009649040293879807, "clip_ratio/low_mean": 0.001129512344050454, "clip_ratio/low_min": 7.973248648340814e-05, "clip_ratio/region_mean": 0.0020944163552485406, "epoch": 8.279883381924199, "grad_norm": 0.11838693171739578, "learning_rate": 1e-06, "loss": 0.0019, "step": 542 }, { "clip_ratio/high_max": 0.0024988237855723128, "clip_ratio/high_mean": 0.0010949193238047883, "clip_ratio/low_mean": 0.0010932493860309478, "clip_ratio/low_min": 4.4433726543502416e-05, "clip_ratio/region_mean": 0.002188168669817969, "epoch": 8.289212827988338, "grad_norm": 0.13246051967144012, "learning_rate": 1e-06, "loss": 0.0327, "step": 543 }, { "clip_ratio/high_max": 0.002739011964877136, "clip_ratio/high_mean": 0.0009972331954486435, "clip_ratio/low_mean": 0.0011938736060983501, "clip_ratio/low_min": 2.8269846552575473e-05, "clip_ratio/region_mean": 0.0021911067960900255, "epoch": 8.298542274052478, "grad_norm": 0.1231093630194664, "learning_rate": 1e-06, "loss": 0.0309, "step": 544 }, { "clip_ratio/high_max": 0.0027517258749867324, "clip_ratio/high_mean": 0.0012929725635331124, "clip_ratio/low_mean": 0.0010480324508534977, "clip_ratio/low_min": 8.57794439070858e-05, "clip_ratio/region_mean": 0.002341005005291663, "epoch": 8.307871720116617, "grad_norm": 0.1205020323395729, "learning_rate": 1e-06, "loss": -0.0242, "step": 545 }, { "clip_ratio/high_max": 0.00230911258404376, "clip_ratio/high_mean": 0.0010238617669529049, "clip_ratio/low_mean": 0.0012371332541079028, "clip_ratio/low_min": 0.00018028656995738856, "clip_ratio/region_mean": 0.0022609949519392103, "epoch": 8.317201166180759, "grad_norm": 0.12609079480171204, "learning_rate": 1e-06, "loss": 0.0361, "step": 546 }, { "clip_ratio/high_max": 0.0022343277887557633, "clip_ratio/high_mean": 0.0010184749244217528, "clip_ratio/low_mean": 0.0011197592430107761, "clip_ratio/low_min": 6.587505777133629e-05, "clip_ratio/region_mean": 0.0021382341801654547, "epoch": 8.326530612244898, "grad_norm": 0.1255784034729004, "learning_rate": 1e-06, "loss": -0.0124, "step": 547 }, { "clip_ratio/high_max": 0.0024673875923326705, "clip_ratio/high_mean": 0.0009784684116311837, "clip_ratio/low_mean": 0.0012355259750620462, "clip_ratio/low_min": 9.116447290580254e-05, "clip_ratio/region_mean": 0.002213994404883124, "epoch": 8.335860058309038, "grad_norm": 0.11374979466199875, "learning_rate": 1e-06, "loss": 0.0421, "step": 548 }, { "clip_ratio/high_max": 0.0028497118619270623, "clip_ratio/high_mean": 0.0011652501452772412, "clip_ratio/low_mean": 0.0012390206211421173, "clip_ratio/low_min": 4.902833552478114e-05, "clip_ratio/region_mean": 0.0024042707809712738, "epoch": 8.345189504373177, "grad_norm": 0.12544985115528107, "learning_rate": 1e-06, "loss": -0.004, "step": 549 }, { "clip_ratio/high_max": 0.002698931406484917, "clip_ratio/high_mean": 0.0012142670348112006, "clip_ratio/low_mean": 0.0011726453540177317, "clip_ratio/low_min": 1.7424030374968424e-05, "clip_ratio/region_mean": 0.002386912368820049, "epoch": 8.354518950437317, "grad_norm": 0.12661287188529968, "learning_rate": 1e-06, "loss": -0.0532, "step": 550 }, { "clip_ratio/high_max": 0.002647233435709495, "clip_ratio/high_mean": 0.000994792913843412, "clip_ratio/low_mean": 0.0011252933873038273, "clip_ratio/low_min": 4.500423347053584e-05, "clip_ratio/region_mean": 0.0021200862975092605, "epoch": 8.363848396501458, "grad_norm": 0.1246127262711525, "learning_rate": 1e-06, "loss": 0.03, "step": 551 }, { "clip_ratio/high_max": 0.0030342207101057284, "clip_ratio/high_mean": 0.0012242562188475858, "clip_ratio/low_mean": 0.0011163458912051283, "clip_ratio/low_min": 0.00012498951218731236, "clip_ratio/region_mean": 0.002340602077310905, "epoch": 8.373177842565598, "grad_norm": 0.12463736534118652, "learning_rate": 1e-06, "loss": -0.0148, "step": 552 }, { "clip_ratio/high_max": 0.0024178476305678487, "clip_ratio/high_mean": 0.0011164468160131946, "clip_ratio/low_mean": 0.0011468453139968915, "clip_ratio/low_min": 7.790819927322445e-05, "clip_ratio/region_mean": 0.002263292102725245, "epoch": 8.382507288629737, "grad_norm": 0.13012540340423584, "learning_rate": 1e-06, "loss": -0.0152, "step": 553 }, { "clip_ratio/high_max": 0.0026487481663934886, "clip_ratio/high_mean": 0.0011689358070725575, "clip_ratio/low_mean": 0.0013282163308758754, "clip_ratio/low_min": 0.00016393925579905044, "clip_ratio/region_mean": 0.002497152134310454, "epoch": 8.391836734693877, "grad_norm": 0.1218249574303627, "learning_rate": 1e-06, "loss": 0.0141, "step": 554 }, { "clip_ratio/high_max": 0.002430065687804017, "clip_ratio/high_mean": 0.001079453075362835, "clip_ratio/low_mean": 0.0011727480468834983, "clip_ratio/low_min": 8.196657290682197e-05, "clip_ratio/region_mean": 0.002252201098599471, "epoch": 8.401166180758018, "grad_norm": 0.12104137241840363, "learning_rate": 1e-06, "loss": 0.0165, "step": 555 }, { "clip_ratio/high_max": 0.0024578454613219947, "clip_ratio/high_mean": 0.0010770013213914353, "clip_ratio/low_mean": 0.001363682411465561, "clip_ratio/low_min": 0.00010510472839087015, "clip_ratio/region_mean": 0.0024406837910646573, "epoch": 8.410495626822158, "grad_norm": 0.1271679401397705, "learning_rate": 1e-06, "loss": 0.0297, "step": 556 }, { "clip_ratio/high_max": 0.0025669149981695227, "clip_ratio/high_mean": 0.0010316094776499085, "clip_ratio/low_mean": 0.0013937309922766872, "clip_ratio/low_min": 0.00010997794561262708, "clip_ratio/region_mean": 0.002425340462650638, "epoch": 8.419825072886297, "grad_norm": 0.11484405398368835, "learning_rate": 1e-06, "loss": 0.035, "step": 557 }, { "clip_ratio/high_max": 0.0028174765466246754, "clip_ratio/high_mean": 0.001194695392769063, "clip_ratio/low_mean": 0.0012002154344372684, "clip_ratio/low_min": 0.00014790886416449212, "clip_ratio/region_mean": 0.0023949107926455326, "epoch": 8.429154518950437, "grad_norm": 0.12489781528711319, "learning_rate": 1e-06, "loss": -0.0216, "step": 558 }, { "clip_ratio/high_max": 0.0020478023834584747, "clip_ratio/high_mean": 0.0009551603143336251, "clip_ratio/low_mean": 0.0013717133515456226, "clip_ratio/low_min": 0.0002706992481762427, "clip_ratio/region_mean": 0.002326873625861481, "epoch": 8.438483965014576, "grad_norm": 0.1298668384552002, "learning_rate": 1e-06, "loss": 0.0136, "step": 559 }, { "clip_ratio/high_max": 0.002336836994800251, "clip_ratio/high_mean": 0.001020310399326263, "clip_ratio/low_mean": 0.0012555011489894241, "clip_ratio/low_min": 0.00013547780417866306, "clip_ratio/region_mean": 0.0022758115374017507, "epoch": 8.447813411078718, "grad_norm": 0.11268424242734909, "learning_rate": 1e-06, "loss": -0.0434, "step": 560 }, { "clip_ratio/high_max": 0.0027585324132815003, "clip_ratio/high_mean": 0.0010997622666764073, "clip_ratio/low_mean": 0.0011211327837372664, "clip_ratio/low_min": 3.309066960355267e-05, "clip_ratio/region_mean": 0.0022208950758795254, "epoch": 8.457142857142857, "grad_norm": 0.11445292085409164, "learning_rate": 1e-06, "loss": -0.0114, "step": 561 }, { "clip_ratio/high_max": 0.0024568577791796997, "clip_ratio/high_mean": 0.0009768408581294352, "clip_ratio/low_mean": 0.001210312597322627, "clip_ratio/low_min": 0.00016917249013204128, "clip_ratio/region_mean": 0.0021871533899684437, "epoch": 8.466472303206997, "grad_norm": 0.12216100096702576, "learning_rate": 1e-06, "loss": -0.014, "step": 562 }, { "clip_ratio/high_max": 0.0026356438829679973, "clip_ratio/high_mean": 0.0011154105686728144, "clip_ratio/low_mean": 0.0012608977049239911, "clip_ratio/low_min": 8.81749529071385e-05, "clip_ratio/region_mean": 0.0023763082645018585, "epoch": 8.475801749271136, "grad_norm": 0.11466651409864426, "learning_rate": 1e-06, "loss": -0.0028, "step": 563 }, { "clip_ratio/high_max": 0.00265516534273047, "clip_ratio/high_mean": 0.001099496028473368, "clip_ratio/low_mean": 0.001142338413046673, "clip_ratio/low_min": 4.412595444591716e-05, "clip_ratio/region_mean": 0.0022418344815378077, "epoch": 8.485131195335278, "grad_norm": 0.12823975086212158, "learning_rate": 1e-06, "loss": -0.0133, "step": 564 }, { "clip_ratio/high_max": 0.002664219711732585, "clip_ratio/high_mean": 0.0011713347194017842, "clip_ratio/low_mean": 0.0010626559778756928, "clip_ratio/low_min": 6.0224479966564104e-05, "clip_ratio/region_mean": 0.002233990693639498, "epoch": 8.494460641399417, "grad_norm": 0.1139550507068634, "learning_rate": 1e-06, "loss": -0.023, "step": 565 }, { "clip_ratio/high_max": 0.0030304407991934568, "clip_ratio/high_mean": 0.0013030897425778676, "clip_ratio/low_mean": 0.0010486265764484415, "clip_ratio/low_min": 1.514050381956622e-05, "clip_ratio/region_mean": 0.002351716277189553, "epoch": 8.503790087463557, "grad_norm": 0.1180059164762497, "learning_rate": 1e-06, "loss": -0.0591, "step": 566 }, { "clip_ratio/high_max": 0.0029900972949690185, "clip_ratio/high_mean": 0.0011263616142969113, "clip_ratio/low_mean": 0.0011362304103386123, "clip_ratio/low_min": 5.1244685892015696e-05, "clip_ratio/region_mean": 0.002262592039187439, "epoch": 8.513119533527696, "grad_norm": 0.125900536775589, "learning_rate": 1e-06, "loss": -0.0057, "step": 567 }, { "clip_ratio/high_max": 0.0028118100890424103, "clip_ratio/high_mean": 0.0010995810516760685, "clip_ratio/low_mean": 0.0011372938515705755, "clip_ratio/low_min": 9.694679647509474e-05, "clip_ratio/region_mean": 0.0022368749632732943, "epoch": 8.522448979591836, "grad_norm": 0.12383028119802475, "learning_rate": 1e-06, "loss": -0.0127, "step": 568 }, { "clip_ratio/high_max": 0.002562097586633172, "clip_ratio/high_mean": 0.0011202461500943173, "clip_ratio/low_mean": 0.0011596060830925126, "clip_ratio/low_min": 3.097893568337895e-05, "clip_ratio/region_mean": 0.00227985223318683, "epoch": 8.531778425655977, "grad_norm": 0.11733996123075485, "learning_rate": 1e-06, "loss": 0.0011, "step": 569 }, { "clip_ratio/high_max": 0.0029617850013892166, "clip_ratio/high_mean": 0.0012063594731444027, "clip_ratio/low_mean": 0.001099165501727839, "clip_ratio/low_min": 0.00010354910409660079, "clip_ratio/region_mean": 0.002305525013071019, "epoch": 8.541107871720117, "grad_norm": 0.11968277394771576, "learning_rate": 1e-06, "loss": -0.0487, "step": 570 }, { "clip_ratio/high_max": 0.002759381903160829, "clip_ratio/high_mean": 0.001307042890402954, "clip_ratio/low_mean": 0.001245315230335109, "clip_ratio/low_min": 7.169775381044019e-05, "clip_ratio/region_mean": 0.0025523581716697663, "epoch": 8.550437317784256, "grad_norm": 0.12500333786010742, "learning_rate": 1e-06, "loss": 0.0054, "step": 571 }, { "clip_ratio/high_max": 0.002503732561308425, "clip_ratio/high_mean": 0.0010237629794573877, "clip_ratio/low_mean": 0.0013923473561590072, "clip_ratio/low_min": 6.724958439008333e-05, "clip_ratio/region_mean": 0.0024161103574442677, "epoch": 8.559766763848396, "grad_norm": 0.12822483479976654, "learning_rate": 1e-06, "loss": 0.0198, "step": 572 }, { "clip_ratio/high_max": 0.0029791240303893574, "clip_ratio/high_mean": 0.00127996172523126, "clip_ratio/low_mean": 0.0011200966910109855, "clip_ratio/low_min": 1.5089329281181563e-05, "clip_ratio/region_mean": 0.0024000583944143727, "epoch": 8.569096209912537, "grad_norm": 0.1319812536239624, "learning_rate": 1e-06, "loss": -0.0376, "step": 573 }, { "clip_ratio/high_max": 0.00259275332064135, "clip_ratio/high_mean": 0.0011809986899606884, "clip_ratio/low_mean": 0.0015884614949754905, "clip_ratio/low_min": 0.0001600734985913732, "clip_ratio/region_mean": 0.0027694601958501153, "epoch": 8.578425655976677, "grad_norm": 0.13094407320022583, "learning_rate": 1e-06, "loss": 0.0353, "step": 574 }, { "clip_ratio/high_max": 0.0023651821647945326, "clip_ratio/high_mean": 0.000938820505325566, "clip_ratio/low_mean": 0.001352846775262151, "clip_ratio/low_min": 6.29270625722711e-05, "clip_ratio/region_mean": 0.002291667216923088, "epoch": 8.587755102040816, "grad_norm": 0.11425367742776871, "learning_rate": 1e-06, "loss": 0.0165, "step": 575 }, { "clip_ratio/high_max": 0.002650808120961301, "clip_ratio/high_mean": 0.0010786241364257876, "clip_ratio/low_mean": 0.001199135702336207, "clip_ratio/low_min": 7.314833601412829e-05, "clip_ratio/region_mean": 0.0022777598787797615, "epoch": 8.597084548104956, "grad_norm": 0.11515863239765167, "learning_rate": 1e-06, "loss": 0.0034, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0337437220982143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 680.9325561523438, "completions/mean_terminated_length": 561.671142578125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 9.00932944606414, "grad_norm": 0.13285647332668304, "learning_rate": 1e-06, "loss": -0.0147, "num_tokens": 371932110.0, "reward": 0.6011962890625, "reward_std": 0.18452098965644836, "rewards/simpleverify_reward/mean": 0.6011962890625, "rewards/simpleverify_reward/std": 0.4896564781665802, "step": 577 }, { "clip_ratio/high_max": 0.002095008887408767, "clip_ratio/high_mean": 0.0008718671524547972, "clip_ratio/low_mean": 0.0005618682589556556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001433735407772474, "epoch": 9.018658892128279, "grad_norm": 0.13264068961143494, "learning_rate": 1e-06, "loss": 0.0151, "step": 578 }, { "clip_ratio/high_max": 0.0025229359016520903, "clip_ratio/high_mean": 0.0010117592555616284, "clip_ratio/low_mean": 0.0005826697615702869, "clip_ratio/low_min": 3.274198115832405e-05, "clip_ratio/region_mean": 0.0015944289916660637, "epoch": 9.02798833819242, "grad_norm": 0.12184672057628632, "learning_rate": 1e-06, "loss": -0.0197, "step": 579 }, { "clip_ratio/high_max": 0.002105564715748187, "clip_ratio/high_mean": 0.0008783109951764345, "clip_ratio/low_mean": 0.000537934067324386, "clip_ratio/low_min": 1.5927624190226197e-05, "clip_ratio/region_mean": 0.00141624506431981, "epoch": 9.03731778425656, "grad_norm": 0.12321247905492783, "learning_rate": 1e-06, "loss": -0.0258, "step": 580 }, { "clip_ratio/high_max": 0.0019943970619351603, "clip_ratio/high_mean": 0.0007671414259675657, "clip_ratio/low_mean": 0.0006085099275878747, "clip_ratio/low_min": 2.1079258658573963e-05, "clip_ratio/region_mean": 0.00137565133991302, "epoch": 9.0466472303207, "grad_norm": 0.11829300969839096, "learning_rate": 1e-06, "loss": 0.0153, "step": 581 }, { "clip_ratio/high_max": 0.001837521373090567, "clip_ratio/high_mean": 0.0007918200026324484, "clip_ratio/low_mean": 0.0007074921368257492, "clip_ratio/low_min": 7.013408139755484e-05, "clip_ratio/region_mean": 0.0014993121221777983, "epoch": 9.055976676384839, "grad_norm": 0.13303428888320923, "learning_rate": 1e-06, "loss": 0.0474, "step": 582 }, { "clip_ratio/high_max": 0.00226736001422978, "clip_ratio/high_mean": 0.0008523790684193955, "clip_ratio/low_mean": 0.0005297585084917955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013821375687257387, "epoch": 9.06530612244898, "grad_norm": 0.11975163966417313, "learning_rate": 1e-06, "loss": 0.0003, "step": 583 }, { "clip_ratio/high_max": 0.0024744837573962286, "clip_ratio/high_mean": 0.0009029937828017864, "clip_ratio/low_mean": 0.0006766312617401127, "clip_ratio/low_min": 3.183332773915026e-05, "clip_ratio/region_mean": 0.0015796250518178567, "epoch": 9.07463556851312, "grad_norm": 0.1264863908290863, "learning_rate": 1e-06, "loss": -0.0051, "step": 584 }, { "clip_ratio/high_max": 0.0024912997178034857, "clip_ratio/high_mean": 0.001027233785862336, "clip_ratio/low_mean": 0.0008041461042012088, "clip_ratio/low_min": 8.870780948200263e-05, "clip_ratio/region_mean": 0.0018313798718736507, "epoch": 9.08396501457726, "grad_norm": 0.13035285472869873, "learning_rate": 1e-06, "loss": -0.0025, "step": 585 }, { "clip_ratio/high_max": 0.002423805846774485, "clip_ratio/high_mean": 0.0010285060652677203, "clip_ratio/low_mean": 0.0006853322192910127, "clip_ratio/low_min": 2.946752283605747e-05, "clip_ratio/region_mean": 0.0017138382681878284, "epoch": 9.093294460641399, "grad_norm": 0.12404130399227142, "learning_rate": 1e-06, "loss": -0.0057, "step": 586 }, { "clip_ratio/high_max": 0.0024937425114330836, "clip_ratio/high_mean": 0.0009585611333022825, "clip_ratio/low_mean": 0.0007391421513602836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001697703315585386, "epoch": 9.102623906705539, "grad_norm": 0.12626686692237854, "learning_rate": 1e-06, "loss": 0.0064, "step": 587 }, { "clip_ratio/high_max": 0.0019201044015062507, "clip_ratio/high_mean": 0.0008419741425313987, "clip_ratio/low_mean": 0.0008389076047023991, "clip_ratio/low_min": 4.0875421291275416e-05, "clip_ratio/region_mean": 0.001680881716310978, "epoch": 9.11195335276968, "grad_norm": 0.1348470002412796, "learning_rate": 1e-06, "loss": 0.0212, "step": 588 }, { "clip_ratio/high_max": 0.0023662338862777688, "clip_ratio/high_mean": 0.0009226638903783169, "clip_ratio/low_mean": 0.0008935322985053062, "clip_ratio/low_min": 8.200370484701125e-05, "clip_ratio/region_mean": 0.0018161961852456443, "epoch": 9.12128279883382, "grad_norm": 0.11456402391195297, "learning_rate": 1e-06, "loss": 0.0051, "step": 589 }, { "clip_ratio/high_max": 0.002215280426753452, "clip_ratio/high_mean": 0.0009347568302473519, "clip_ratio/low_mean": 0.0009179181470244657, "clip_ratio/low_min": 1.3326225598575547e-05, "clip_ratio/region_mean": 0.0018526749663578812, "epoch": 9.130612244897959, "grad_norm": 0.12748394906520844, "learning_rate": 1e-06, "loss": 0.0289, "step": 590 }, { "clip_ratio/high_max": 0.0024388823949266225, "clip_ratio/high_mean": 0.0010755289495136822, "clip_ratio/low_mean": 0.0009498595791228581, "clip_ratio/low_min": 2.879520798160229e-05, "clip_ratio/region_mean": 0.0020253884940757416, "epoch": 9.139941690962099, "grad_norm": 0.12352927029132843, "learning_rate": 1e-06, "loss": -0.0126, "step": 591 }, { "clip_ratio/high_max": 0.0024118531509884633, "clip_ratio/high_mean": 0.0010310264515283052, "clip_ratio/low_mean": 0.0008847234876157017, "clip_ratio/low_min": 3.9131849916884676e-05, "clip_ratio/region_mean": 0.001915749948238954, "epoch": 9.14927113702624, "grad_norm": 0.12940713763237, "learning_rate": 1e-06, "loss": -0.032, "step": 592 }, { "clip_ratio/high_max": 0.002132603374775499, "clip_ratio/high_mean": 0.0010033632170234341, "clip_ratio/low_mean": 0.0009103004704229534, "clip_ratio/low_min": 0.0001242578064193367, "clip_ratio/region_mean": 0.001913663676532451, "epoch": 9.15860058309038, "grad_norm": 0.11409001797437668, "learning_rate": 1e-06, "loss": -0.0087, "step": 593 }, { "clip_ratio/high_max": 0.00252408941014437, "clip_ratio/high_mean": 0.0010411773146188352, "clip_ratio/low_mean": 0.0008968510146587505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001938028355652932, "epoch": 9.167930029154519, "grad_norm": 0.11580094695091248, "learning_rate": 1e-06, "loss": -0.0194, "step": 594 }, { "clip_ratio/high_max": 0.002397456963080913, "clip_ratio/high_mean": 0.0010751511490525445, "clip_ratio/low_mean": 0.0009203229292324977, "clip_ratio/low_min": 3.9574621951032896e-05, "clip_ratio/region_mean": 0.0019954740128014237, "epoch": 9.177259475218658, "grad_norm": 0.12042679637670517, "learning_rate": 1e-06, "loss": -0.0321, "step": 595 }, { "clip_ratio/high_max": 0.0026028020729427226, "clip_ratio/high_mean": 0.0009402143041370437, "clip_ratio/low_mean": 0.001097114090953255, "clip_ratio/low_min": 4.034861194668338e-05, "clip_ratio/region_mean": 0.0020373283623484895, "epoch": 9.186588921282798, "grad_norm": 0.11380136013031006, "learning_rate": 1e-06, "loss": 0.0082, "step": 596 }, { "clip_ratio/high_max": 0.0025542153452988714, "clip_ratio/high_mean": 0.0010863021307159215, "clip_ratio/low_mean": 0.0010616341896820813, "clip_ratio/low_min": 2.504006442904938e-05, "clip_ratio/region_mean": 0.002147936334949918, "epoch": 9.19591836734694, "grad_norm": 0.13990618288516998, "learning_rate": 1e-06, "loss": -0.0257, "step": 597 }, { "clip_ratio/high_max": 0.0024183921777876094, "clip_ratio/high_mean": 0.0010112084855791181, "clip_ratio/low_mean": 0.0010370101445005275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002048218622803688, "epoch": 9.205247813411079, "grad_norm": 0.1352010816335678, "learning_rate": 1e-06, "loss": 0.0106, "step": 598 }, { "clip_ratio/high_max": 0.002286081580678001, "clip_ratio/high_mean": 0.00101827048274572, "clip_ratio/low_mean": 0.0010397060759714805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020579765550792217, "epoch": 9.214577259475218, "grad_norm": 0.12656739354133606, "learning_rate": 1e-06, "loss": -0.0157, "step": 599 }, { "clip_ratio/high_max": 0.002966120846394915, "clip_ratio/high_mean": 0.00115546835695568, "clip_ratio/low_mean": 0.0009242282285413239, "clip_ratio/low_min": 5.58867359359283e-05, "clip_ratio/region_mean": 0.0020796966055058874, "epoch": 9.223906705539358, "grad_norm": 0.13320180773735046, "learning_rate": 1e-06, "loss": -0.0322, "step": 600 }, { "clip_ratio/high_max": 0.002570431104686577, "clip_ratio/high_mean": 0.001044825570716057, "clip_ratio/low_mean": 0.001054975482475129, "clip_ratio/low_min": 4.5328949454415124e-05, "clip_ratio/region_mean": 0.0020998011023039, "epoch": 9.2332361516035, "grad_norm": 0.12351791560649872, "learning_rate": 1e-06, "loss": -0.0275, "step": 601 }, { "clip_ratio/high_max": 0.002675545278179925, "clip_ratio/high_mean": 0.0010400641695014201, "clip_ratio/low_mean": 0.0011567278088477906, "clip_ratio/low_min": 7.891396489867475e-05, "clip_ratio/region_mean": 0.002196792025642935, "epoch": 9.242565597667639, "grad_norm": 0.1305057853460312, "learning_rate": 1e-06, "loss": 0.0043, "step": 602 }, { "clip_ratio/high_max": 0.0023576595849590376, "clip_ratio/high_mean": 0.0009686773337307386, "clip_ratio/low_mean": 0.0011383059954823693, "clip_ratio/low_min": 0.00010036851017503068, "clip_ratio/region_mean": 0.0021069833310320973, "epoch": 9.251895043731778, "grad_norm": 0.12467783689498901, "learning_rate": 1e-06, "loss": 0.0067, "step": 603 }, { "clip_ratio/high_max": 0.002176429348764941, "clip_ratio/high_mean": 0.0010041748682851903, "clip_ratio/low_mean": 0.00103925911753322, "clip_ratio/low_min": 7.47635403968161e-05, "clip_ratio/region_mean": 0.002043434033112135, "epoch": 9.261224489795918, "grad_norm": 0.12222645431756973, "learning_rate": 1e-06, "loss": 0.0018, "step": 604 }, { "clip_ratio/high_max": 0.0023096971854101866, "clip_ratio/high_mean": 0.0011084528614446754, "clip_ratio/low_mean": 0.0008928074948926223, "clip_ratio/low_min": 5.180273365112953e-05, "clip_ratio/region_mean": 0.0020012603272334673, "epoch": 9.270553935860057, "grad_norm": 0.12752173840999603, "learning_rate": 1e-06, "loss": -0.0462, "step": 605 }, { "clip_ratio/high_max": 0.002885396752390079, "clip_ratio/high_mean": 0.001113216520025162, "clip_ratio/low_mean": 0.0010250577224724111, "clip_ratio/low_min": 5.2374300139490515e-05, "clip_ratio/region_mean": 0.0021382742197602056, "epoch": 9.279883381924199, "grad_norm": 0.1273031085729599, "learning_rate": 1e-06, "loss": -0.035, "step": 606 }, { "clip_ratio/high_max": 0.0024283317252411507, "clip_ratio/high_mean": 0.0011151989856443834, "clip_ratio/low_mean": 0.0010842994015547447, "clip_ratio/low_min": 4.774637272930704e-05, "clip_ratio/region_mean": 0.0021994983835611492, "epoch": 9.289212827988338, "grad_norm": 0.12082916498184204, "learning_rate": 1e-06, "loss": -0.0084, "step": 607 }, { "clip_ratio/high_max": 0.0022554691677214578, "clip_ratio/high_mean": 0.00090830139015452, "clip_ratio/low_mean": 0.0011348240113875363, "clip_ratio/low_min": 9.679474896984175e-05, "clip_ratio/region_mean": 0.0020431253578863107, "epoch": 9.298542274052478, "grad_norm": 0.11502255499362946, "learning_rate": 1e-06, "loss": 0.0456, "step": 608 }, { "clip_ratio/high_max": 0.002281930428580381, "clip_ratio/high_mean": 0.0010584541523712687, "clip_ratio/low_mean": 0.001140832609962672, "clip_ratio/low_min": 0.0001009505576803349, "clip_ratio/region_mean": 0.002199286813265644, "epoch": 9.307871720116617, "grad_norm": 0.13609622418880463, "learning_rate": 1e-06, "loss": -0.0035, "step": 609 }, { "clip_ratio/high_max": 0.0022955050953896716, "clip_ratio/high_mean": 0.0009464829636272043, "clip_ratio/low_mean": 0.0012318073968344834, "clip_ratio/low_min": 4.103742685401812e-05, "clip_ratio/region_mean": 0.0021782904223073274, "epoch": 9.317201166180759, "grad_norm": 0.11110639572143555, "learning_rate": 1e-06, "loss": 0.0354, "step": 610 }, { "clip_ratio/high_max": 0.0026898417563643306, "clip_ratio/high_mean": 0.0011072184024669696, "clip_ratio/low_mean": 0.0012886570548289455, "clip_ratio/low_min": 0.00011487503797980025, "clip_ratio/region_mean": 0.002395875475485809, "epoch": 9.326530612244898, "grad_norm": 0.12480517476797104, "learning_rate": 1e-06, "loss": -0.0055, "step": 611 }, { "clip_ratio/high_max": 0.002011195130762644, "clip_ratio/high_mean": 0.0010132121260539861, "clip_ratio/low_mean": 0.001125232516642427, "clip_ratio/low_min": 0.00010078738341690041, "clip_ratio/region_mean": 0.0021384446299634874, "epoch": 9.335860058309038, "grad_norm": 0.10422385483980179, "learning_rate": 1e-06, "loss": 0.0158, "step": 612 }, { "clip_ratio/high_max": 0.0025812284147832543, "clip_ratio/high_mean": 0.0011202832974959165, "clip_ratio/low_mean": 0.0011645415434031747, "clip_ratio/low_min": 0.00012398919170664158, "clip_ratio/region_mean": 0.0022848248700029217, "epoch": 9.345189504373177, "grad_norm": 0.12176833301782608, "learning_rate": 1e-06, "loss": -0.0198, "step": 613 }, { "clip_ratio/high_max": 0.002298432886163937, "clip_ratio/high_mean": 0.0010270744842273416, "clip_ratio/low_mean": 0.0011916998264496215, "clip_ratio/low_min": 1.1743705726985354e-05, "clip_ratio/region_mean": 0.002218774359789677, "epoch": 9.354518950437317, "grad_norm": 0.12416286766529083, "learning_rate": 1e-06, "loss": -0.0283, "step": 614 }, { "clip_ratio/high_max": 0.0024842171660566237, "clip_ratio/high_mean": 0.0010853604235308012, "clip_ratio/low_mean": 0.0011723396019078791, "clip_ratio/low_min": 8.367688496946357e-05, "clip_ratio/region_mean": 0.0022577000709134154, "epoch": 9.363848396501458, "grad_norm": 0.12261302024126053, "learning_rate": 1e-06, "loss": -0.02, "step": 615 }, { "clip_ratio/high_max": 0.002542728660046123, "clip_ratio/high_mean": 0.0010006554966821568, "clip_ratio/low_mean": 0.0012999935970583465, "clip_ratio/low_min": 0.00010261273564537987, "clip_ratio/region_mean": 0.00230064907373162, "epoch": 9.373177842565598, "grad_norm": 0.12616237998008728, "learning_rate": 1e-06, "loss": -0.0085, "step": 616 }, { "clip_ratio/high_max": 0.0025822238967521116, "clip_ratio/high_mean": 0.0011296734628558625, "clip_ratio/low_mean": 0.001227458327775821, "clip_ratio/low_min": 5.39282946192543e-05, "clip_ratio/region_mean": 0.0023571317942696624, "epoch": 9.382507288629737, "grad_norm": 0.1137118861079216, "learning_rate": 1e-06, "loss": 0.0039, "step": 617 }, { "clip_ratio/high_max": 0.0022787383568356745, "clip_ratio/high_mean": 0.0010230451371171512, "clip_ratio/low_mean": 0.0011172243212058675, "clip_ratio/low_min": 8.193383473553695e-05, "clip_ratio/region_mean": 0.0021402694765129127, "epoch": 9.391836734693877, "grad_norm": 0.1310858279466629, "learning_rate": 1e-06, "loss": -0.0114, "step": 618 }, { "clip_ratio/high_max": 0.0028333611408015713, "clip_ratio/high_mean": 0.0010583788407529937, "clip_ratio/low_mean": 0.0013321573205757886, "clip_ratio/low_min": 0.0001295913098147139, "clip_ratio/region_mean": 0.0023905362031655386, "epoch": 9.401166180758018, "grad_norm": 0.13438986241817474, "learning_rate": 1e-06, "loss": 0.0471, "step": 619 }, { "clip_ratio/high_max": 0.00254293757097912, "clip_ratio/high_mean": 0.0011987317921011709, "clip_ratio/low_mean": 0.0011739305682567647, "clip_ratio/low_min": 7.440601257258095e-05, "clip_ratio/region_mean": 0.0023726623912807554, "epoch": 9.410495626822158, "grad_norm": 0.1205848976969719, "learning_rate": 1e-06, "loss": 0.0017, "step": 620 }, { "clip_ratio/high_max": 0.0026614868984324858, "clip_ratio/high_mean": 0.0009962680505850585, "clip_ratio/low_mean": 0.0012723621366603766, "clip_ratio/low_min": 0.0002207628476753598, "clip_ratio/region_mean": 0.00226863023272017, "epoch": 9.419825072886297, "grad_norm": 0.11237507313489914, "learning_rate": 1e-06, "loss": 0.0184, "step": 621 }, { "clip_ratio/high_max": 0.0026837940749828704, "clip_ratio/high_mean": 0.0011231761309318244, "clip_ratio/low_mean": 0.0011201181878277566, "clip_ratio/low_min": 1.602975135028828e-05, "clip_ratio/region_mean": 0.0022432943151216023, "epoch": 9.429154518950437, "grad_norm": 0.11535617709159851, "learning_rate": 1e-06, "loss": 0.0047, "step": 622 }, { "clip_ratio/high_max": 0.002706679268158041, "clip_ratio/high_mean": 0.0011671201209537685, "clip_ratio/low_mean": 0.001195083423226606, "clip_ratio/low_min": 6.795987519581104e-05, "clip_ratio/region_mean": 0.0023622034932486713, "epoch": 9.438483965014576, "grad_norm": 0.18078528344631195, "learning_rate": 1e-06, "loss": -0.0147, "step": 623 }, { "clip_ratio/high_max": 0.0026416391556267627, "clip_ratio/high_mean": 0.0011515861806401517, "clip_ratio/low_mean": 0.0011183592196175596, "clip_ratio/low_min": 3.810975613305345e-05, "clip_ratio/region_mean": 0.002269945412990637, "epoch": 9.447813411078718, "grad_norm": 0.12711338698863983, "learning_rate": 1e-06, "loss": -0.0082, "step": 624 }, { "clip_ratio/high_max": 0.0026272208997397684, "clip_ratio/high_mean": 0.0011124699121864978, "clip_ratio/low_mean": 0.0011897631957253907, "clip_ratio/low_min": 1.7796128304325975e-05, "clip_ratio/region_mean": 0.002302233115187846, "epoch": 9.457142857142857, "grad_norm": 0.11111123859882355, "learning_rate": 1e-06, "loss": 0.0005, "step": 625 }, { "clip_ratio/high_max": 0.0028883457998745143, "clip_ratio/high_mean": 0.0011402333620935678, "clip_ratio/low_mean": 0.001043206502799876, "clip_ratio/low_min": 5.6443625908286776e-05, "clip_ratio/region_mean": 0.002183439864893444, "epoch": 9.466472303206997, "grad_norm": 0.12038857489824295, "learning_rate": 1e-06, "loss": -0.0258, "step": 626 }, { "clip_ratio/high_max": 0.002864792331820354, "clip_ratio/high_mean": 0.0011926564111490734, "clip_ratio/low_mean": 0.0011926289735129103, "clip_ratio/low_min": 0.0001204743512062123, "clip_ratio/region_mean": 0.0023852853701100685, "epoch": 9.475801749271136, "grad_norm": 0.14227232336997986, "learning_rate": 1e-06, "loss": 0.0279, "step": 627 }, { "clip_ratio/high_max": 0.002558366431912873, "clip_ratio/high_mean": 0.0010591216378088575, "clip_ratio/low_mean": 0.0011159516870975494, "clip_ratio/low_min": 9.450146899325773e-05, "clip_ratio/region_mean": 0.002175073343096301, "epoch": 9.485131195335278, "grad_norm": 0.11924881488084793, "learning_rate": 1e-06, "loss": 0.011, "step": 628 }, { "clip_ratio/high_max": 0.0024306028644787148, "clip_ratio/high_mean": 0.0010062136061605997, "clip_ratio/low_mean": 0.0014047953554836567, "clip_ratio/low_min": 0.00024120151101669762, "clip_ratio/region_mean": 0.002411008972558193, "epoch": 9.494460641399417, "grad_norm": 0.11524834483861923, "learning_rate": 1e-06, "loss": 0.0282, "step": 629 }, { "clip_ratio/high_max": 0.0027105009721708484, "clip_ratio/high_mean": 0.0012046547562931664, "clip_ratio/low_mean": 0.001090580968593713, "clip_ratio/low_min": 6.118566216173349e-05, "clip_ratio/region_mean": 0.002295235695783049, "epoch": 9.503790087463557, "grad_norm": 4.149825096130371, "learning_rate": 1e-06, "loss": -0.0233, "step": 630 }, { "clip_ratio/high_max": 0.0022502470965264365, "clip_ratio/high_mean": 0.001068422930984525, "clip_ratio/low_mean": 0.0013431821171252523, "clip_ratio/low_min": 0.0001410236109222751, "clip_ratio/region_mean": 0.0024116049899021164, "epoch": 9.513119533527696, "grad_norm": 0.12179077416658401, "learning_rate": 1e-06, "loss": 0.0296, "step": 631 }, { "clip_ratio/high_max": 0.002633572967170039, "clip_ratio/high_mean": 0.0012700480001512915, "clip_ratio/low_mean": 0.00114913525612792, "clip_ratio/low_min": 0.00012557354421005584, "clip_ratio/region_mean": 0.0024191832198994234, "epoch": 9.522448979591836, "grad_norm": 0.12621724605560303, "learning_rate": 1e-06, "loss": -0.0546, "step": 632 }, { "clip_ratio/high_max": 0.0033828141604317352, "clip_ratio/high_mean": 0.0013177760229154956, "clip_ratio/low_mean": 0.001145079781053937, "clip_ratio/low_min": 0.0001697611123745446, "clip_ratio/region_mean": 0.0024628558530821465, "epoch": 9.531778425655977, "grad_norm": 0.12737131118774414, "learning_rate": 1e-06, "loss": -0.0104, "step": 633 }, { "clip_ratio/high_max": 0.002435559654259123, "clip_ratio/high_mean": 0.0010674743780327844, "clip_ratio/low_mean": 0.0010726855052780593, "clip_ratio/low_min": 6.857414427940967e-05, "clip_ratio/region_mean": 0.0021401599078672007, "epoch": 9.541107871720117, "grad_norm": 0.11658084392547607, "learning_rate": 1e-06, "loss": 0.0054, "step": 634 }, { "clip_ratio/high_max": 0.0029381924541667104, "clip_ratio/high_mean": 0.0012834291337640025, "clip_ratio/low_mean": 0.0012484374419727828, "clip_ratio/low_min": 2.2325415557133965e-05, "clip_ratio/region_mean": 0.002531866564822849, "epoch": 9.550437317784256, "grad_norm": 0.12238677591085434, "learning_rate": 1e-06, "loss": -0.0065, "step": 635 }, { "clip_ratio/high_max": 0.0028432906219677534, "clip_ratio/high_mean": 0.0012824474415538134, "clip_ratio/low_mean": 0.001128096177126281, "clip_ratio/low_min": 7.687859124416718e-05, "clip_ratio/region_mean": 0.002410543638688978, "epoch": 9.559766763848396, "grad_norm": 0.11918799579143524, "learning_rate": 1e-06, "loss": -0.0446, "step": 636 }, { "clip_ratio/high_max": 0.002640255057485774, "clip_ratio/high_mean": 0.0011537502614373807, "clip_ratio/low_mean": 0.0015121560973057058, "clip_ratio/low_min": 7.015793380560353e-05, "clip_ratio/region_mean": 0.0026659063005354255, "epoch": 9.569096209912537, "grad_norm": 0.18935620784759521, "learning_rate": 1e-06, "loss": 0.018, "step": 637 }, { "clip_ratio/high_max": 0.0026502571563469246, "clip_ratio/high_mean": 0.0011769416414608713, "clip_ratio/low_mean": 0.0012465130421333015, "clip_ratio/low_min": 3.7534337025135756e-05, "clip_ratio/region_mean": 0.0024234546726802364, "epoch": 9.578425655976677, "grad_norm": 0.12795966863632202, "learning_rate": 1e-06, "loss": -0.0058, "step": 638 }, { "clip_ratio/high_max": 0.002897474951168988, "clip_ratio/high_mean": 0.0012982441949134227, "clip_ratio/low_mean": 0.001345323344139615, "clip_ratio/low_min": 0.0001741444539220538, "clip_ratio/region_mean": 0.0026435674881213345, "epoch": 9.587755102040816, "grad_norm": 0.12297380715608597, "learning_rate": 1e-06, "loss": -0.019, "step": 639 }, { "clip_ratio/high_max": 0.002759557122772094, "clip_ratio/high_mean": 0.001197715235321084, "clip_ratio/low_mean": 0.0012491223606048152, "clip_ratio/low_min": 4.050881034345366e-05, "clip_ratio/region_mean": 0.0024468376068398356, "epoch": 9.597084548104956, "grad_norm": 0.11560143530368805, "learning_rate": 1e-06, "loss": 0.0147, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036411830357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 689.0717163085938, "completions/mean_terminated_length": 560.3316040039062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 10.00932944606414, "grad_norm": 0.1392713338136673, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 408630472.0, "reward": 0.6119384765625, "reward_std": 0.180939719080925, "rewards/simpleverify_reward/mean": 0.6119384765625, "rewards/simpleverify_reward/std": 0.48731300234794617, "step": 641 }, { "clip_ratio/high_max": 0.002047834153927397, "clip_ratio/high_mean": 0.0008650950385344913, "clip_ratio/low_mean": 0.00047096276421143557, "clip_ratio/low_min": 5.592924571828917e-05, "clip_ratio/region_mean": 0.001336057797743706, "epoch": 10.018658892128279, "grad_norm": 0.13607341051101685, "learning_rate": 1e-06, "loss": -0.002, "step": 642 }, { "clip_ratio/high_max": 0.0022278865799307823, "clip_ratio/high_mean": 0.0009834486918407492, "clip_ratio/low_mean": 0.0004665450869651977, "clip_ratio/low_min": 9.999999747378752e-06, "clip_ratio/region_mean": 0.001449993760616053, "epoch": 10.02798833819242, "grad_norm": 0.14427782595157623, "learning_rate": 1e-06, "loss": -0.0355, "step": 643 }, { "clip_ratio/high_max": 0.00204466410286841, "clip_ratio/high_mean": 0.0008348910723725567, "clip_ratio/low_mean": 0.0006804180611652555, "clip_ratio/low_min": 1.4253135304898024e-05, "clip_ratio/region_mean": 0.001515309137175791, "epoch": 10.03731778425656, "grad_norm": 0.137557253241539, "learning_rate": 1e-06, "loss": 0.0036, "step": 644 }, { "clip_ratio/high_max": 0.0022067810787120834, "clip_ratio/high_mean": 0.0009102083386096638, "clip_ratio/low_mean": 0.0006301733064901782, "clip_ratio/low_min": 1.498441633884795e-05, "clip_ratio/region_mean": 0.0015403816432808526, "epoch": 10.0466472303207, "grad_norm": 0.12389273196458817, "learning_rate": 1e-06, "loss": -0.0166, "step": 645 }, { "clip_ratio/high_max": 0.002310533782292623, "clip_ratio/high_mean": 0.0009628772040741751, "clip_ratio/low_mean": 0.0007191718659669277, "clip_ratio/low_min": 0.0001007345927064307, "clip_ratio/region_mean": 0.0016820490891404916, "epoch": 10.055976676384839, "grad_norm": 0.122986800968647, "learning_rate": 1e-06, "loss": -0.0057, "step": 646 }, { "clip_ratio/high_max": 0.0022726235183654353, "clip_ratio/high_mean": 0.0009261680079362122, "clip_ratio/low_mean": 0.0007612035115016624, "clip_ratio/low_min": 8.911988334148191e-05, "clip_ratio/region_mean": 0.0016873714994289912, "epoch": 10.06530612244898, "grad_norm": 0.12592777609825134, "learning_rate": 1e-06, "loss": 0.004, "step": 647 }, { "clip_ratio/high_max": 0.002151245153072523, "clip_ratio/high_mean": 0.0009111481977015501, "clip_ratio/low_mean": 0.0009201193934131879, "clip_ratio/low_min": 9.33001347220852e-05, "clip_ratio/region_mean": 0.0018312676111236215, "epoch": 10.07463556851312, "grad_norm": 0.20106640458106995, "learning_rate": 1e-06, "loss": 0.0087, "step": 648 }, { "clip_ratio/high_max": 0.0021997812364134006, "clip_ratio/high_mean": 0.0009732377184263896, "clip_ratio/low_mean": 0.000970530974882422, "clip_ratio/low_min": 3.126488627458457e-05, "clip_ratio/region_mean": 0.0019437686787568964, "epoch": 10.08396501457726, "grad_norm": 0.11654148250818253, "learning_rate": 1e-06, "loss": 0.0082, "step": 649 }, { "clip_ratio/high_max": 0.002358433324843645, "clip_ratio/high_mean": 0.000947766635363223, "clip_ratio/low_mean": 0.0008258764564743615, "clip_ratio/low_min": 2.7778428375313524e-05, "clip_ratio/region_mean": 0.0017736431182129309, "epoch": 10.093294460641399, "grad_norm": 0.12819696962833405, "learning_rate": 1e-06, "loss": -0.001, "step": 650 }, { "clip_ratio/high_max": 0.0024035481619648635, "clip_ratio/high_mean": 0.0010478258845978417, "clip_ratio/low_mean": 0.0008721863960090559, "clip_ratio/low_min": 9.82086748990696e-06, "clip_ratio/region_mean": 0.001920012255141046, "epoch": 10.102623906705539, "grad_norm": 0.1123422384262085, "learning_rate": 1e-06, "loss": 0.0017, "step": 651 }, { "clip_ratio/high_max": 0.0023037685532472096, "clip_ratio/high_mean": 0.0010170627847401192, "clip_ratio/low_mean": 0.0008383774784306297, "clip_ratio/low_min": 3.5487936656863894e-05, "clip_ratio/region_mean": 0.001855440241342876, "epoch": 10.11195335276968, "grad_norm": 0.1292382776737213, "learning_rate": 1e-06, "loss": 0.0204, "step": 652 }, { "clip_ratio/high_max": 0.002392032190982718, "clip_ratio/high_mean": 0.0010367856884840876, "clip_ratio/low_mean": 0.0009508761158940615, "clip_ratio/low_min": 7.856137472117553e-05, "clip_ratio/region_mean": 0.0019876617734553292, "epoch": 10.12128279883382, "grad_norm": 0.1392720639705658, "learning_rate": 1e-06, "loss": -0.0116, "step": 653 }, { "clip_ratio/high_max": 0.0025919851104845293, "clip_ratio/high_mean": 0.001032770062010968, "clip_ratio/low_mean": 0.0008818659116514027, "clip_ratio/low_min": 1.6864543795236386e-05, "clip_ratio/region_mean": 0.0019146359481965192, "epoch": 10.130612244897959, "grad_norm": 0.16294026374816895, "learning_rate": 1e-06, "loss": -0.0109, "step": 654 }, { "clip_ratio/high_max": 0.0024329038860742003, "clip_ratio/high_mean": 0.0009516924801573623, "clip_ratio/low_mean": 0.000850626505780383, "clip_ratio/low_min": 6.167898027342744e-05, "clip_ratio/region_mean": 0.0018023189550149255, "epoch": 10.139941690962099, "grad_norm": 0.12228190153837204, "learning_rate": 1e-06, "loss": 0.0106, "step": 655 }, { "clip_ratio/high_max": 0.002595309360913234, "clip_ratio/high_mean": 0.0010521063704800326, "clip_ratio/low_mean": 0.0010832557891262695, "clip_ratio/low_min": 2.2494152290164493e-05, "clip_ratio/region_mean": 0.002135362185072154, "epoch": 10.14927113702624, "grad_norm": 0.1252080351114273, "learning_rate": 1e-06, "loss": 0.0118, "step": 656 }, { "clip_ratio/high_max": 0.0027463661135698203, "clip_ratio/high_mean": 0.0009227972986991517, "clip_ratio/low_mean": 0.0010185221362917218, "clip_ratio/low_min": 4.5262389903655276e-05, "clip_ratio/region_mean": 0.0019413193840591703, "epoch": 10.15860058309038, "grad_norm": 0.13458426296710968, "learning_rate": 1e-06, "loss": 0.0381, "step": 657 }, { "clip_ratio/high_max": 0.002248806515126489, "clip_ratio/high_mean": 0.001009656858514063, "clip_ratio/low_mean": 0.0012476445772335865, "clip_ratio/low_min": 5.814436281070812e-05, "clip_ratio/region_mean": 0.002257301428471692, "epoch": 10.167930029154519, "grad_norm": 0.12581020593643188, "learning_rate": 1e-06, "loss": 0.0137, "step": 658 }, { "clip_ratio/high_max": 0.0023237678869918454, "clip_ratio/high_mean": 0.0009907921230478678, "clip_ratio/low_mean": 0.0010463258113304619, "clip_ratio/low_min": 0.0001229994868481299, "clip_ratio/region_mean": 0.0020371178834466264, "epoch": 10.177259475218658, "grad_norm": 0.11078030616044998, "learning_rate": 1e-06, "loss": -0.0194, "step": 659 }, { "clip_ratio/high_max": 0.002240027672087308, "clip_ratio/high_mean": 0.0010262998184771277, "clip_ratio/low_mean": 0.0009901681496558012, "clip_ratio/low_min": 2.6652451197151095e-05, "clip_ratio/region_mean": 0.0020164679444860667, "epoch": 10.186588921282798, "grad_norm": 0.12092120945453644, "learning_rate": 1e-06, "loss": -0.0162, "step": 660 }, { "clip_ratio/high_max": 0.002720078598940745, "clip_ratio/high_mean": 0.0011530970223248005, "clip_ratio/low_mean": 0.0009657608370616799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002118857912137173, "epoch": 10.19591836734694, "grad_norm": 0.11478060483932495, "learning_rate": 1e-06, "loss": -0.0625, "step": 661 }, { "clip_ratio/high_max": 0.002589636256743688, "clip_ratio/high_mean": 0.0010263687290716916, "clip_ratio/low_mean": 0.0010298481556674233, "clip_ratio/low_min": 4.5873623093939386e-05, "clip_ratio/region_mean": 0.0020562169156619348, "epoch": 10.205247813411079, "grad_norm": 0.12798132002353668, "learning_rate": 1e-06, "loss": -0.0017, "step": 662 }, { "clip_ratio/high_max": 0.0025797197959036566, "clip_ratio/high_mean": 0.0010226304239040473, "clip_ratio/low_mean": 0.0010953042183245998, "clip_ratio/low_min": 8.636050006316509e-05, "clip_ratio/region_mean": 0.0021179346294957213, "epoch": 10.214577259475218, "grad_norm": 0.13217544555664062, "learning_rate": 1e-06, "loss": 0.0054, "step": 663 }, { "clip_ratio/high_max": 0.002498672816727776, "clip_ratio/high_mean": 0.0010616590225254185, "clip_ratio/low_mean": 0.0011730836195056327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002234742627479136, "epoch": 10.223906705539358, "grad_norm": 0.11584997922182083, "learning_rate": 1e-06, "loss": -0.0079, "step": 664 }, { "clip_ratio/high_max": 0.0022701531743223313, "clip_ratio/high_mean": 0.0009171811143460218, "clip_ratio/low_mean": 0.0010016705309681129, "clip_ratio/low_min": 4.604869445756776e-05, "clip_ratio/region_mean": 0.0019188516162103042, "epoch": 10.2332361516035, "grad_norm": 0.12133453041315079, "learning_rate": 1e-06, "loss": 0.0436, "step": 665 }, { "clip_ratio/high_max": 0.002220187958300812, "clip_ratio/high_mean": 0.0009100147872231901, "clip_ratio/low_mean": 0.001300268151680939, "clip_ratio/low_min": 2.7959726139670238e-05, "clip_ratio/region_mean": 0.002210282960732002, "epoch": 10.242565597667639, "grad_norm": 0.11900375783443451, "learning_rate": 1e-06, "loss": -0.0053, "step": 666 }, { "clip_ratio/high_max": 0.002403224028967088, "clip_ratio/high_mean": 0.0010102004207510618, "clip_ratio/low_mean": 0.0012406728637870401, "clip_ratio/low_min": 9.695904373074882e-05, "clip_ratio/region_mean": 0.002250873265438713, "epoch": 10.251895043731778, "grad_norm": 0.1301468163728714, "learning_rate": 1e-06, "loss": -0.0038, "step": 667 }, { "clip_ratio/high_max": 0.0027375788195058703, "clip_ratio/high_mean": 0.0009631810626160586, "clip_ratio/low_mean": 0.0011323421203996986, "clip_ratio/low_min": 5.215851251705317e-05, "clip_ratio/region_mean": 0.002095523159368895, "epoch": 10.261224489795918, "grad_norm": 0.11487710475921631, "learning_rate": 1e-06, "loss": 0.0007, "step": 668 }, { "clip_ratio/high_max": 0.00240037602634402, "clip_ratio/high_mean": 0.0011246110479987692, "clip_ratio/low_mean": 0.0010027175630966667, "clip_ratio/low_min": 1.436451384506654e-05, "clip_ratio/region_mean": 0.0021273286183713935, "epoch": 10.270553935860057, "grad_norm": 0.12883462011814117, "learning_rate": 1e-06, "loss": -0.0357, "step": 669 }, { "clip_ratio/high_max": 0.0024460777422063984, "clip_ratio/high_mean": 0.0009644778874644544, "clip_ratio/low_mean": 0.0010287969125784002, "clip_ratio/low_min": 6.640352057729615e-05, "clip_ratio/region_mean": 0.0019932748000428546, "epoch": 10.279883381924199, "grad_norm": 0.11545933037996292, "learning_rate": 1e-06, "loss": 0.0129, "step": 670 }, { "clip_ratio/high_max": 0.002622639702167362, "clip_ratio/high_mean": 0.0011205963382963091, "clip_ratio/low_mean": 0.0010524799108679872, "clip_ratio/low_min": 0.0001541666715638712, "clip_ratio/region_mean": 0.0021730762673541903, "epoch": 10.289212827988338, "grad_norm": 0.1184234619140625, "learning_rate": 1e-06, "loss": -0.0069, "step": 671 }, { "clip_ratio/high_max": 0.0026612194487825036, "clip_ratio/high_mean": 0.0011800450593000278, "clip_ratio/low_mean": 0.001131166485720314, "clip_ratio/low_min": 6.303721056610812e-05, "clip_ratio/region_mean": 0.002311211545020342, "epoch": 10.298542274052478, "grad_norm": 0.1345868855714798, "learning_rate": 1e-06, "loss": -0.0174, "step": 672 }, { "clip_ratio/high_max": 0.002358785131946206, "clip_ratio/high_mean": 0.0011492310877656564, "clip_ratio/low_mean": 0.001042951074850862, "clip_ratio/low_min": 0.00013271452189655975, "clip_ratio/region_mean": 0.002192182233557105, "epoch": 10.307871720116617, "grad_norm": 0.12077160179615021, "learning_rate": 1e-06, "loss": 0.0164, "step": 673 }, { "clip_ratio/high_max": 0.002329846676730085, "clip_ratio/high_mean": 0.0010293162886227947, "clip_ratio/low_mean": 0.000970665880231536, "clip_ratio/low_min": 2.9770025321340654e-05, "clip_ratio/region_mean": 0.0019999821670353413, "epoch": 10.317201166180759, "grad_norm": 0.11701883375644684, "learning_rate": 1e-06, "loss": 0.0141, "step": 674 }, { "clip_ratio/high_max": 0.0023262734903255478, "clip_ratio/high_mean": 0.0009797258080652682, "clip_ratio/low_mean": 0.0009553791642247234, "clip_ratio/low_min": 1.7269963791477494e-05, "clip_ratio/region_mean": 0.001935104992298875, "epoch": 10.326530612244898, "grad_norm": 0.12469476461410522, "learning_rate": 1e-06, "loss": 0.0126, "step": 675 }, { "clip_ratio/high_max": 0.002977002179250121, "clip_ratio/high_mean": 0.0011426664823375177, "clip_ratio/low_mean": 0.0009623113328416366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002104977822455112, "epoch": 10.335860058309038, "grad_norm": 0.11522013694047928, "learning_rate": 1e-06, "loss": -0.0093, "step": 676 }, { "clip_ratio/high_max": 0.0025358507846249267, "clip_ratio/high_mean": 0.0011223089568375144, "clip_ratio/low_mean": 0.0012691402262134943, "clip_ratio/low_min": 7.694023042859044e-05, "clip_ratio/region_mean": 0.0023914492048788816, "epoch": 10.345189504373177, "grad_norm": 0.1352730393409729, "learning_rate": 1e-06, "loss": 0.0167, "step": 677 }, { "clip_ratio/high_max": 0.002420452430669684, "clip_ratio/high_mean": 0.0011734755935322028, "clip_ratio/low_mean": 0.0009007682801893679, "clip_ratio/low_min": 3.0252333999669645e-05, "clip_ratio/region_mean": 0.0020742438355227932, "epoch": 10.354518950437317, "grad_norm": 0.12467822432518005, "learning_rate": 1e-06, "loss": -0.0279, "step": 678 }, { "clip_ratio/high_max": 0.002822324739099713, "clip_ratio/high_mean": 0.0010816074500326067, "clip_ratio/low_mean": 0.0009819369370234199, "clip_ratio/low_min": 5.467043229145929e-05, "clip_ratio/region_mean": 0.0020635444088838995, "epoch": 10.363848396501458, "grad_norm": 0.11344575136899948, "learning_rate": 1e-06, "loss": -0.0348, "step": 679 }, { "clip_ratio/high_max": 0.002812238846672699, "clip_ratio/high_mean": 0.001204537635203451, "clip_ratio/low_mean": 0.0010692879404814448, "clip_ratio/low_min": 4.6952697630331386e-05, "clip_ratio/region_mean": 0.002273825608426705, "epoch": 10.373177842565598, "grad_norm": 0.13063548505306244, "learning_rate": 1e-06, "loss": -0.0351, "step": 680 }, { "clip_ratio/high_max": 0.003112280297500547, "clip_ratio/high_mean": 0.0011077583130827406, "clip_ratio/low_mean": 0.001213093175465474, "clip_ratio/low_min": 2.037822014244739e-05, "clip_ratio/region_mean": 0.0023208514976431616, "epoch": 10.382507288629737, "grad_norm": 0.12051547318696976, "learning_rate": 1e-06, "loss": 0.0171, "step": 681 }, { "clip_ratio/high_max": 0.002610737799841445, "clip_ratio/high_mean": 0.0011157767839904409, "clip_ratio/low_mean": 0.0013045312698523048, "clip_ratio/low_min": 5.999003042234108e-05, "clip_ratio/region_mean": 0.0024203080392908305, "epoch": 10.391836734693877, "grad_norm": 0.13831959664821625, "learning_rate": 1e-06, "loss": 0.0116, "step": 682 }, { "clip_ratio/high_max": 0.0028272876006667502, "clip_ratio/high_mean": 0.0011715241562342271, "clip_ratio/low_mean": 0.0010931353972409852, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022646595316473395, "epoch": 10.401166180758018, "grad_norm": 0.1293380856513977, "learning_rate": 1e-06, "loss": -0.0352, "step": 683 }, { "clip_ratio/high_max": 0.00275197607697919, "clip_ratio/high_mean": 0.0011955971167481039, "clip_ratio/low_mean": 0.001124811311456142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023204084354802035, "epoch": 10.410495626822158, "grad_norm": 0.11363451182842255, "learning_rate": 1e-06, "loss": -0.0146, "step": 684 }, { "clip_ratio/high_max": 0.0025618452855269425, "clip_ratio/high_mean": 0.001123327234381577, "clip_ratio/low_mean": 0.001032562580803642, "clip_ratio/low_min": 1.3138532267475966e-05, "clip_ratio/region_mean": 0.0021558897860813886, "epoch": 10.419825072886297, "grad_norm": 0.11953888088464737, "learning_rate": 1e-06, "loss": -0.0254, "step": 685 }, { "clip_ratio/high_max": 0.00277746456413297, "clip_ratio/high_mean": 0.0010868894460145384, "clip_ratio/low_mean": 0.0009991054794227239, "clip_ratio/low_min": 7.77307304815622e-05, "clip_ratio/region_mean": 0.002085994950903114, "epoch": 10.429154518950437, "grad_norm": 0.12367735058069229, "learning_rate": 1e-06, "loss": 0.0017, "step": 686 }, { "clip_ratio/high_max": 0.0025198418443324044, "clip_ratio/high_mean": 0.0011185725234099664, "clip_ratio/low_mean": 0.0011533467932167696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022719192929798737, "epoch": 10.438483965014576, "grad_norm": 0.1284836083650589, "learning_rate": 1e-06, "loss": -0.0126, "step": 687 }, { "clip_ratio/high_max": 0.0029426096443785354, "clip_ratio/high_mean": 0.0013686332567885984, "clip_ratio/low_mean": 0.0010915840393863618, "clip_ratio/low_min": 1.524762137705693e-05, "clip_ratio/region_mean": 0.0024602173070888966, "epoch": 10.447813411078718, "grad_norm": 0.11925153434276581, "learning_rate": 1e-06, "loss": -0.0365, "step": 688 }, { "clip_ratio/high_max": 0.0026962149859173223, "clip_ratio/high_mean": 0.001117982745199697, "clip_ratio/low_mean": 0.001111999030399602, "clip_ratio/low_min": 5.7968771216110326e-05, "clip_ratio/region_mean": 0.0022299817901512142, "epoch": 10.457142857142857, "grad_norm": 0.12644325196743011, "learning_rate": 1e-06, "loss": 0.0007, "step": 689 }, { "clip_ratio/high_max": 0.00284798932261765, "clip_ratio/high_mean": 0.0011122077685286058, "clip_ratio/low_mean": 0.0013346712476050016, "clip_ratio/low_min": 0.0001674002287472831, "clip_ratio/region_mean": 0.002446878919727169, "epoch": 10.466472303206997, "grad_norm": 0.15829947590827942, "learning_rate": 1e-06, "loss": 0.0149, "step": 690 }, { "clip_ratio/high_max": 0.0028364363170112483, "clip_ratio/high_mean": 0.0011795817226811778, "clip_ratio/low_mean": 0.0011272262781858444, "clip_ratio/low_min": 5.4322379583027214e-05, "clip_ratio/region_mean": 0.0023068079390213825, "epoch": 10.475801749271136, "grad_norm": 0.12783053517341614, "learning_rate": 1e-06, "loss": -0.0039, "step": 691 }, { "clip_ratio/high_max": 0.002701826801057905, "clip_ratio/high_mean": 0.0011491954064695165, "clip_ratio/low_mean": 0.0010792308130476158, "clip_ratio/low_min": 6.33924973953981e-05, "clip_ratio/region_mean": 0.002228426186775323, "epoch": 10.485131195335278, "grad_norm": 0.13505765795707703, "learning_rate": 1e-06, "loss": 0.0011, "step": 692 }, { "clip_ratio/high_max": 0.002801874201395549, "clip_ratio/high_mean": 0.0011267380687058903, "clip_ratio/low_mean": 0.0012592140174092492, "clip_ratio/low_min": 6.472423046943732e-05, "clip_ratio/region_mean": 0.0023859520806581713, "epoch": 10.494460641399417, "grad_norm": 0.1198091208934784, "learning_rate": 1e-06, "loss": -0.006, "step": 693 }, { "clip_ratio/high_max": 0.00272663126088446, "clip_ratio/high_mean": 0.0011502458728500642, "clip_ratio/low_mean": 0.0011762642261601286, "clip_ratio/low_min": 5.099959162180312e-05, "clip_ratio/region_mean": 0.0023265101553988643, "epoch": 10.503790087463557, "grad_norm": 0.1183510273694992, "learning_rate": 1e-06, "loss": -0.0282, "step": 694 }, { "clip_ratio/high_max": 0.00226325225958135, "clip_ratio/high_mean": 0.0010219620453426614, "clip_ratio/low_mean": 0.0011328844866511645, "clip_ratio/low_min": 7.217723759822547e-05, "clip_ratio/region_mean": 0.0021548465665546246, "epoch": 10.513119533527696, "grad_norm": 0.11545974761247635, "learning_rate": 1e-06, "loss": 0.0347, "step": 695 }, { "clip_ratio/high_max": 0.0026324745631427504, "clip_ratio/high_mean": 0.0010099662504217122, "clip_ratio/low_mean": 0.0012412101968948264, "clip_ratio/low_min": 7.962570816744119e-05, "clip_ratio/region_mean": 0.002251176440040581, "epoch": 10.522448979591836, "grad_norm": 0.12273969501256943, "learning_rate": 1e-06, "loss": 0.007, "step": 696 }, { "clip_ratio/high_max": 0.002909318311139941, "clip_ratio/high_mean": 0.0013815920901834033, "clip_ratio/low_mean": 0.001027212639201025, "clip_ratio/low_min": 1.5206812349788379e-05, "clip_ratio/region_mean": 0.0024088047721306793, "epoch": 10.531778425655977, "grad_norm": 0.13470061123371124, "learning_rate": 1e-06, "loss": -0.0138, "step": 697 }, { "clip_ratio/high_max": 0.0028308058535913005, "clip_ratio/high_mean": 0.0011884839223057497, "clip_ratio/low_mean": 0.0012319489069341216, "clip_ratio/low_min": 4.3097268644487485e-05, "clip_ratio/region_mean": 0.002420432814687956, "epoch": 10.541107871720117, "grad_norm": 0.12553003430366516, "learning_rate": 1e-06, "loss": -0.0059, "step": 698 }, { "clip_ratio/high_max": 0.0024274217066704296, "clip_ratio/high_mean": 0.001128029794926988, "clip_ratio/low_mean": 0.001073542824087781, "clip_ratio/low_min": 7.914487468951847e-05, "clip_ratio/region_mean": 0.002201572591729928, "epoch": 10.550437317784256, "grad_norm": 0.13157522678375244, "learning_rate": 1e-06, "loss": -0.0077, "step": 699 }, { "clip_ratio/high_max": 0.0026281162499799393, "clip_ratio/high_mean": 0.001287640701775672, "clip_ratio/low_mean": 0.0010792171415232588, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023668578287470154, "epoch": 10.559766763848396, "grad_norm": 0.12107131630182266, "learning_rate": 1e-06, "loss": -0.0182, "step": 700 }, { "clip_ratio/high_max": 0.002921500949014444, "clip_ratio/high_mean": 0.0013067157597106416, "clip_ratio/low_mean": 0.0010316732441424392, "clip_ratio/low_min": 2.7345685339241754e-05, "clip_ratio/region_mean": 0.002338389022042975, "epoch": 10.569096209912537, "grad_norm": 0.11855398863554001, "learning_rate": 1e-06, "loss": -0.0406, "step": 701 }, { "clip_ratio/high_max": 0.0028459496825234964, "clip_ratio/high_mean": 0.0013375794660532847, "clip_ratio/low_mean": 0.0009570107904437464, "clip_ratio/low_min": 2.015478821704164e-05, "clip_ratio/region_mean": 0.0022945903037907556, "epoch": 10.578425655976677, "grad_norm": 0.11987543851137161, "learning_rate": 1e-06, "loss": -0.0312, "step": 702 }, { "clip_ratio/high_max": 0.0028598568751476705, "clip_ratio/high_mean": 0.0013222416673670523, "clip_ratio/low_mean": 0.0010981945051753428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024204362198361196, "epoch": 10.587755102040816, "grad_norm": 0.12368352711200714, "learning_rate": 1e-06, "loss": -0.0507, "step": 703 }, { "clip_ratio/high_max": 0.0027670677736750804, "clip_ratio/high_mean": 0.0010696100234781625, "clip_ratio/low_mean": 0.0010607376643747557, "clip_ratio/low_min": 1.981610694201663e-05, "clip_ratio/region_mean": 0.002130347696947865, "epoch": 10.597084548104956, "grad_norm": 0.13114728033542633, "learning_rate": 1e-06, "loss": 0.0038, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0411202566964286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 704.5460815429688, "completions/mean_terminated_length": 559.1080322265625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 11.00932944606414, "grad_norm": 0.12951792776584625, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 445078717.0, "reward": 0.6148332953453064, "reward_std": 0.181431844830513, "rewards/simpleverify_reward/mean": 0.6148332953453064, "rewards/simpleverify_reward/std": 0.4866389334201813, "step": 705 }, { "clip_ratio/high_max": 0.0024895566384657286, "clip_ratio/high_mean": 0.001000732179818442, "clip_ratio/low_mean": 0.0005967781635263236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015975103597156703, "epoch": 11.018658892128279, "grad_norm": 0.1346265971660614, "learning_rate": 1e-06, "loss": 0.0223, "step": 706 }, { "clip_ratio/high_max": 0.0019017110389540903, "clip_ratio/high_mean": 0.0008059542378759943, "clip_ratio/low_mean": 0.0006481111213361146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014540653573931195, "epoch": 11.02798833819242, "grad_norm": 0.12726321816444397, "learning_rate": 1e-06, "loss": -0.0029, "step": 707 }, { "clip_ratio/high_max": 0.001742305763400509, "clip_ratio/high_mean": 0.0007656710840819869, "clip_ratio/low_mean": 0.0006633641851294669, "clip_ratio/low_min": 2.565629347373033e-05, "clip_ratio/region_mean": 0.001429035281034885, "epoch": 11.03731778425656, "grad_norm": 0.12016823142766953, "learning_rate": 1e-06, "loss": 0.0018, "step": 708 }, { "clip_ratio/high_max": 0.0022843821134301834, "clip_ratio/high_mean": 0.0009043302670761477, "clip_ratio/low_mean": 0.0005947845074842917, "clip_ratio/low_min": 3.191217911080457e-05, "clip_ratio/region_mean": 0.00149911477274145, "epoch": 11.0466472303207, "grad_norm": 0.11984176933765411, "learning_rate": 1e-06, "loss": -0.0322, "step": 709 }, { "clip_ratio/high_max": 0.0019807598291663453, "clip_ratio/high_mean": 0.0008598407857789425, "clip_ratio/low_mean": 0.0007699524157942506, "clip_ratio/low_min": 4.4958812395634595e-05, "clip_ratio/region_mean": 0.0016297931688313838, "epoch": 11.055976676384839, "grad_norm": 0.1348883956670761, "learning_rate": 1e-06, "loss": -0.0167, "step": 710 }, { "clip_ratio/high_max": 0.0025368011883983854, "clip_ratio/high_mean": 0.0010764137769001536, "clip_ratio/low_mean": 0.0006921420699654846, "clip_ratio/low_min": 6.029195265000453e-05, "clip_ratio/region_mean": 0.0017685558341327123, "epoch": 11.06530612244898, "grad_norm": 0.12955395877361298, "learning_rate": 1e-06, "loss": -0.0284, "step": 711 }, { "clip_ratio/high_max": 0.0019479884358588606, "clip_ratio/high_mean": 0.0008761910139583051, "clip_ratio/low_mean": 0.0008445257117273286, "clip_ratio/low_min": 4.405466461321339e-05, "clip_ratio/region_mean": 0.0017207166674779728, "epoch": 11.07463556851312, "grad_norm": 0.131577730178833, "learning_rate": 1e-06, "loss": -0.01, "step": 712 }, { "clip_ratio/high_max": 0.0024894445232348517, "clip_ratio/high_mean": 0.0009285551295761252, "clip_ratio/low_mean": 0.0008968574893515324, "clip_ratio/low_min": 3.550074870872777e-05, "clip_ratio/region_mean": 0.0018254126116517, "epoch": 11.08396501457726, "grad_norm": 0.1403147280216217, "learning_rate": 1e-06, "loss": 0.0326, "step": 713 }, { "clip_ratio/high_max": 0.0021842913993168622, "clip_ratio/high_mean": 0.000965073184488574, "clip_ratio/low_mean": 0.0007783819610267528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017434551264159381, "epoch": 11.093294460641399, "grad_norm": 0.12377360463142395, "learning_rate": 1e-06, "loss": -0.0221, "step": 714 }, { "clip_ratio/high_max": 0.0026069017985719256, "clip_ratio/high_mean": 0.0010752888483693823, "clip_ratio/low_mean": 0.0007933147626317805, "clip_ratio/low_min": 4.6561715862480924e-05, "clip_ratio/region_mean": 0.001868603598268237, "epoch": 11.102623906705539, "grad_norm": 0.1331249624490738, "learning_rate": 1e-06, "loss": -0.0632, "step": 715 }, { "clip_ratio/high_max": 0.0023348920076387003, "clip_ratio/high_mean": 0.0009418703884875868, "clip_ratio/low_mean": 0.0009070927280845353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018489631547708996, "epoch": 11.11195335276968, "grad_norm": 0.12545324862003326, "learning_rate": 1e-06, "loss": 0.013, "step": 716 }, { "clip_ratio/high_max": 0.0022802559251431376, "clip_ratio/high_mean": 0.0009894158720271662, "clip_ratio/low_mean": 0.0008970978014986031, "clip_ratio/low_min": 1.7720441974233836e-05, "clip_ratio/region_mean": 0.0018865136444219388, "epoch": 11.12128279883382, "grad_norm": 0.14978772401809692, "learning_rate": 1e-06, "loss": -0.0008, "step": 717 }, { "clip_ratio/high_max": 0.0026689244841691107, "clip_ratio/high_mean": 0.0011391809239285067, "clip_ratio/low_mean": 0.0009504259451205144, "clip_ratio/low_min": 2.439982108626282e-05, "clip_ratio/region_mean": 0.0020896068745059893, "epoch": 11.130612244897959, "grad_norm": 0.14861105382442474, "learning_rate": 1e-06, "loss": 0.0112, "step": 718 }, { "clip_ratio/high_max": 0.00223810021270765, "clip_ratio/high_mean": 0.000930754715227522, "clip_ratio/low_mean": 0.001068526689778082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001999281368625816, "epoch": 11.139941690962099, "grad_norm": 0.12481657415628433, "learning_rate": 1e-06, "loss": -0.0018, "step": 719 }, { "clip_ratio/high_max": 0.002785318487440236, "clip_ratio/high_mean": 0.001166293030109955, "clip_ratio/low_mean": 0.0010012327038566582, "clip_ratio/low_min": 2.1742911485489458e-05, "clip_ratio/region_mean": 0.0021675257085007615, "epoch": 11.14927113702624, "grad_norm": 0.11951804906129837, "learning_rate": 1e-06, "loss": -0.0406, "step": 720 }, { "clip_ratio/high_max": 0.0023329493415076286, "clip_ratio/high_mean": 0.0009614210248400923, "clip_ratio/low_mean": 0.0009302967100666137, "clip_ratio/low_min": 7.445224946422968e-05, "clip_ratio/region_mean": 0.0018917177185358014, "epoch": 11.15860058309038, "grad_norm": 0.12068294733762741, "learning_rate": 1e-06, "loss": -0.0064, "step": 721 }, { "clip_ratio/high_max": 0.0021991022731526755, "clip_ratio/high_mean": 0.0009441447273275116, "clip_ratio/low_mean": 0.0009330413085990585, "clip_ratio/low_min": 3.205401571904076e-05, "clip_ratio/region_mean": 0.0018771860777633265, "epoch": 11.167930029154519, "grad_norm": 0.11660280078649521, "learning_rate": 1e-06, "loss": 0.0035, "step": 722 }, { "clip_ratio/high_max": 0.00244561888393946, "clip_ratio/high_mean": 0.0010596034298941959, "clip_ratio/low_mean": 0.0009064220203072182, "clip_ratio/low_min": 6.806187957408838e-05, "clip_ratio/region_mean": 0.0019660254183690995, "epoch": 11.177259475218658, "grad_norm": 0.11755045503377914, "learning_rate": 1e-06, "loss": -0.0003, "step": 723 }, { "clip_ratio/high_max": 0.002438738629280124, "clip_ratio/high_mean": 0.0010361317436036188, "clip_ratio/low_mean": 0.000995213542410056, "clip_ratio/low_min": 9.091229549085256e-05, "clip_ratio/region_mean": 0.0020313452841946855, "epoch": 11.186588921282798, "grad_norm": 0.12721142172813416, "learning_rate": 1e-06, "loss": 0.0133, "step": 724 }, { "clip_ratio/high_max": 0.002381140257057268, "clip_ratio/high_mean": 0.0009218243103532586, "clip_ratio/low_mean": 0.0008442512171313865, "clip_ratio/low_min": 3.742515036719851e-05, "clip_ratio/region_mean": 0.0017660755038377829, "epoch": 11.19591836734694, "grad_norm": 0.12327289581298828, "learning_rate": 1e-06, "loss": 0.0034, "step": 725 }, { "clip_ratio/high_max": 0.002510574799089227, "clip_ratio/high_mean": 0.001081999600501149, "clip_ratio/low_mean": 0.000987239349342417, "clip_ratio/low_min": 8.373669697903097e-05, "clip_ratio/region_mean": 0.002069238958938513, "epoch": 11.205247813411079, "grad_norm": 0.13151709735393524, "learning_rate": 1e-06, "loss": 0.0173, "step": 726 }, { "clip_ratio/high_max": 0.0023962165432749316, "clip_ratio/high_mean": 0.001015934754832415, "clip_ratio/low_mean": 0.0011323489979986334, "clip_ratio/low_min": 3.128910975647159e-05, "clip_ratio/region_mean": 0.0021482837400981225, "epoch": 11.214577259475218, "grad_norm": 0.11238902062177658, "learning_rate": 1e-06, "loss": -0.0005, "step": 727 }, { "clip_ratio/high_max": 0.0026177162799285725, "clip_ratio/high_mean": 0.0009803035973163787, "clip_ratio/low_mean": 0.0009010215417220024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018813251663232222, "epoch": 11.223906705539358, "grad_norm": 0.11881382763385773, "learning_rate": 1e-06, "loss": -0.0198, "step": 728 }, { "clip_ratio/high_max": 0.0021843644572072662, "clip_ratio/high_mean": 0.0010725448009907268, "clip_ratio/low_mean": 0.0010054527301690541, "clip_ratio/low_min": 2.4708440832910128e-05, "clip_ratio/region_mean": 0.002077997567539569, "epoch": 11.2332361516035, "grad_norm": 0.17188583314418793, "learning_rate": 1e-06, "loss": 0.0185, "step": 729 }, { "clip_ratio/high_max": 0.002614750243083108, "clip_ratio/high_mean": 0.0012701251234830124, "clip_ratio/low_mean": 0.0010940887150354683, "clip_ratio/low_min": 6.12630810792325e-05, "clip_ratio/region_mean": 0.0023642137603019364, "epoch": 11.242565597667639, "grad_norm": 0.12362480908632278, "learning_rate": 1e-06, "loss": -0.0523, "step": 730 }, { "clip_ratio/high_max": 0.0028457361986511387, "clip_ratio/high_mean": 0.0011353273948770948, "clip_ratio/low_mean": 0.0010526663954806281, "clip_ratio/low_min": 0.00013636371477332432, "clip_ratio/region_mean": 0.002187993814004585, "epoch": 11.251895043731778, "grad_norm": 0.12405002117156982, "learning_rate": 1e-06, "loss": 0.0146, "step": 731 }, { "clip_ratio/high_max": 0.00239518877788214, "clip_ratio/high_mean": 0.0009262924450013088, "clip_ratio/low_mean": 0.0010145465892037464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019408389998716302, "epoch": 11.261224489795918, "grad_norm": 0.12108957022428513, "learning_rate": 1e-06, "loss": 0.0316, "step": 732 }, { "clip_ratio/high_max": 0.002161001590138767, "clip_ratio/high_mean": 0.0009232370721292682, "clip_ratio/low_mean": 0.0011434762782300822, "clip_ratio/low_min": 9.173826310870936e-05, "clip_ratio/region_mean": 0.0020667133649112657, "epoch": 11.270553935860057, "grad_norm": 0.13761521875858307, "learning_rate": 1e-06, "loss": 0.0257, "step": 733 }, { "clip_ratio/high_max": 0.002515211104764603, "clip_ratio/high_mean": 0.0009791414495339268, "clip_ratio/low_mean": 0.0011099152488895925, "clip_ratio/low_min": 5.0089560318156146e-05, "clip_ratio/region_mean": 0.002089056739350781, "epoch": 11.279883381924199, "grad_norm": 0.1308261752128601, "learning_rate": 1e-06, "loss": 0.0514, "step": 734 }, { "clip_ratio/high_max": 0.002347918962186668, "clip_ratio/high_mean": 0.0009449677945667645, "clip_ratio/low_mean": 0.0009651322561694542, "clip_ratio/low_min": 7.460100096068345e-05, "clip_ratio/region_mean": 0.0019101000289083458, "epoch": 11.289212827988338, "grad_norm": 0.11759794503450394, "learning_rate": 1e-06, "loss": 0.0155, "step": 735 }, { "clip_ratio/high_max": 0.002399704262643354, "clip_ratio/high_mean": 0.0009696418019302655, "clip_ratio/low_mean": 0.0010621196779538877, "clip_ratio/low_min": 5.5973654525587335e-05, "clip_ratio/region_mean": 0.002031761468970217, "epoch": 11.298542274052478, "grad_norm": 0.12267669290304184, "learning_rate": 1e-06, "loss": 0.0107, "step": 736 }, { "clip_ratio/high_max": 0.0023604551824973896, "clip_ratio/high_mean": 0.0011393387085263385, "clip_ratio/low_mean": 0.0009235406751031405, "clip_ratio/low_min": 2.9429937967506703e-05, "clip_ratio/region_mean": 0.0020628794154617935, "epoch": 11.307871720116617, "grad_norm": 0.11160534620285034, "learning_rate": 1e-06, "loss": -0.0345, "step": 737 }, { "clip_ratio/high_max": 0.002713168109039543, "clip_ratio/high_mean": 0.0010418410347483587, "clip_ratio/low_mean": 0.0011020058441317815, "clip_ratio/low_min": 0.00011018855548172724, "clip_ratio/region_mean": 0.0021438468756969087, "epoch": 11.317201166180759, "grad_norm": 0.12543310225009918, "learning_rate": 1e-06, "loss": 0.0164, "step": 738 }, { "clip_ratio/high_max": 0.0028527663671411574, "clip_ratio/high_mean": 0.0011213679899810813, "clip_ratio/low_mean": 0.0009325786959379911, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020539467004709877, "epoch": 11.326530612244898, "grad_norm": 0.15083754062652588, "learning_rate": 1e-06, "loss": -0.0146, "step": 739 }, { "clip_ratio/high_max": 0.0030303685562103055, "clip_ratio/high_mean": 0.0011599207973631565, "clip_ratio/low_mean": 0.0013503695226972923, "clip_ratio/low_min": 6.762324028386502e-05, "clip_ratio/region_mean": 0.002510290330974385, "epoch": 11.335860058309038, "grad_norm": 0.24084800481796265, "learning_rate": 1e-06, "loss": -0.0045, "step": 740 }, { "clip_ratio/high_max": 0.0020095816762477625, "clip_ratio/high_mean": 0.0009474514572502812, "clip_ratio/low_mean": 0.0012158709614595864, "clip_ratio/low_min": 2.384585968684405e-05, "clip_ratio/region_mean": 0.0021633224096149206, "epoch": 11.345189504373177, "grad_norm": 0.1264019012451172, "learning_rate": 1e-06, "loss": 0.054, "step": 741 }, { "clip_ratio/high_max": 0.0025180551238008775, "clip_ratio/high_mean": 0.001131332595832646, "clip_ratio/low_mean": 0.0009596096006134758, "clip_ratio/low_min": 3.4610492548381444e-05, "clip_ratio/region_mean": 0.0020909421727992594, "epoch": 11.354518950437317, "grad_norm": 0.12203217297792435, "learning_rate": 1e-06, "loss": -0.0248, "step": 742 }, { "clip_ratio/high_max": 0.0030798617590335198, "clip_ratio/high_mean": 0.0013094159221509472, "clip_ratio/low_mean": 0.000991718527075136, "clip_ratio/low_min": 3.9871658373158425e-05, "clip_ratio/region_mean": 0.0023011344601400197, "epoch": 11.363848396501458, "grad_norm": 0.13002605736255646, "learning_rate": 1e-06, "loss": -0.018, "step": 743 }, { "clip_ratio/high_max": 0.0027146751308464445, "clip_ratio/high_mean": 0.0012330370445852168, "clip_ratio/low_mean": 0.0009880520810838789, "clip_ratio/low_min": 0.00010868485333048739, "clip_ratio/region_mean": 0.002221089096565265, "epoch": 11.373177842565598, "grad_norm": 0.13661731779575348, "learning_rate": 1e-06, "loss": -0.0582, "step": 744 }, { "clip_ratio/high_max": 0.002614112469018437, "clip_ratio/high_mean": 0.0012876811924797948, "clip_ratio/low_mean": 0.0010484185404493473, "clip_ratio/low_min": 8.594152677687816e-05, "clip_ratio/region_mean": 0.0023360997074632905, "epoch": 11.382507288629737, "grad_norm": 0.13901357352733612, "learning_rate": 1e-06, "loss": -0.0137, "step": 745 }, { "clip_ratio/high_max": 0.002895220648497343, "clip_ratio/high_mean": 0.0012374354955682065, "clip_ratio/low_mean": 0.0009863231771305436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002223758645413909, "epoch": 11.391836734693877, "grad_norm": 0.12362924218177795, "learning_rate": 1e-06, "loss": -0.0252, "step": 746 }, { "clip_ratio/high_max": 0.0026101976764039136, "clip_ratio/high_mean": 0.001134576006734278, "clip_ratio/low_mean": 0.0011314359962852905, "clip_ratio/low_min": 5.206631522014504e-05, "clip_ratio/region_mean": 0.0022660120666841976, "epoch": 11.401166180758018, "grad_norm": 0.12327460199594498, "learning_rate": 1e-06, "loss": -0.0087, "step": 747 }, { "clip_ratio/high_max": 0.002580092885182239, "clip_ratio/high_mean": 0.001262037254491588, "clip_ratio/low_mean": 0.0010065585374832153, "clip_ratio/low_min": 0.00011536663078004494, "clip_ratio/region_mean": 0.00226859580288874, "epoch": 11.410495626822158, "grad_norm": 0.12078581750392914, "learning_rate": 1e-06, "loss": -0.0297, "step": 748 }, { "clip_ratio/high_max": 0.003057281966903247, "clip_ratio/high_mean": 0.0012085692305845441, "clip_ratio/low_mean": 0.0011419986767577939, "clip_ratio/low_min": 6.670238690276165e-05, "clip_ratio/region_mean": 0.002350567912799306, "epoch": 11.419825072886297, "grad_norm": 0.12613429129123688, "learning_rate": 1e-06, "loss": -0.0415, "step": 749 }, { "clip_ratio/high_max": 0.00254219192720484, "clip_ratio/high_mean": 0.0011747742610168643, "clip_ratio/low_mean": 0.001075635615052306, "clip_ratio/low_min": 1.7841850421973504e-05, "clip_ratio/region_mean": 0.0022504099033540115, "epoch": 11.429154518950437, "grad_norm": 0.12662017345428467, "learning_rate": 1e-06, "loss": 0.0049, "step": 750 }, { "clip_ratio/high_max": 0.002502478026144672, "clip_ratio/high_mean": 0.0010446332362334942, "clip_ratio/low_mean": 0.0012332304595474852, "clip_ratio/low_min": 8.38929736346472e-05, "clip_ratio/region_mean": 0.0022778636775910854, "epoch": 11.438483965014576, "grad_norm": 0.13240495324134827, "learning_rate": 1e-06, "loss": -0.0294, "step": 751 }, { "clip_ratio/high_max": 0.0027811097825178877, "clip_ratio/high_mean": 0.001113544170948444, "clip_ratio/low_mean": 0.0011844903619930847, "clip_ratio/low_min": 6.30276099400362e-05, "clip_ratio/region_mean": 0.0022980345675023273, "epoch": 11.447813411078718, "grad_norm": 0.1276206225156784, "learning_rate": 1e-06, "loss": -0.0008, "step": 752 }, { "clip_ratio/high_max": 0.0030890490452293307, "clip_ratio/high_mean": 0.0011506654882396106, "clip_ratio/low_mean": 0.0012208038824610412, "clip_ratio/low_min": 4.82346149510704e-05, "clip_ratio/region_mean": 0.0023714693379588425, "epoch": 11.457142857142857, "grad_norm": 0.13413143157958984, "learning_rate": 1e-06, "loss": -0.0112, "step": 753 }, { "clip_ratio/high_max": 0.0031963121437001973, "clip_ratio/high_mean": 0.0013220494729466736, "clip_ratio/low_mean": 0.0012697162783297244, "clip_ratio/low_min": 3.198566992068663e-05, "clip_ratio/region_mean": 0.002591765791294165, "epoch": 11.466472303206997, "grad_norm": 0.13057121634483337, "learning_rate": 1e-06, "loss": -0.0176, "step": 754 }, { "clip_ratio/high_max": 0.00294013433449436, "clip_ratio/high_mean": 0.0012466764637792949, "clip_ratio/low_mean": 0.0014524683028867003, "clip_ratio/low_min": 0.00019288149997009896, "clip_ratio/region_mean": 0.002699144803045783, "epoch": 11.475801749271136, "grad_norm": 0.11764442175626755, "learning_rate": 1e-06, "loss": -0.0065, "step": 755 }, { "clip_ratio/high_max": 0.002724162877711933, "clip_ratio/high_mean": 0.0011036147552658804, "clip_ratio/low_mean": 0.0011416962333896663, "clip_ratio/low_min": 6.9522475314443e-05, "clip_ratio/region_mean": 0.00224531094863778, "epoch": 11.485131195335278, "grad_norm": 0.12831537425518036, "learning_rate": 1e-06, "loss": 0.0018, "step": 756 }, { "clip_ratio/high_max": 0.002910467497713398, "clip_ratio/high_mean": 0.0012290866106923204, "clip_ratio/low_mean": 0.0012091222270100843, "clip_ratio/low_min": 5.258953842712799e-05, "clip_ratio/region_mean": 0.002438208866806235, "epoch": 11.494460641399417, "grad_norm": 0.1343003511428833, "learning_rate": 1e-06, "loss": -0.0358, "step": 757 }, { "clip_ratio/high_max": 0.0025345139220007695, "clip_ratio/high_mean": 0.0011368698706064606, "clip_ratio/low_mean": 0.0011714310985553311, "clip_ratio/low_min": 4.6779983676970005e-05, "clip_ratio/region_mean": 0.002308300980075728, "epoch": 11.503790087463557, "grad_norm": 0.12139391899108887, "learning_rate": 1e-06, "loss": 0.0057, "step": 758 }, { "clip_ratio/high_max": 0.002611616342619527, "clip_ratio/high_mean": 0.0011538451690285, "clip_ratio/low_mean": 0.0011436322620284045, "clip_ratio/low_min": 0.00012903800961794332, "clip_ratio/region_mean": 0.002297477491083555, "epoch": 11.513119533527696, "grad_norm": 0.1262354552745819, "learning_rate": 1e-06, "loss": -0.0227, "step": 759 }, { "clip_ratio/high_max": 0.002452380213071592, "clip_ratio/high_mean": 0.0010706639877753332, "clip_ratio/low_mean": 0.001350758289845544, "clip_ratio/low_min": 0.00016404510461143218, "clip_ratio/region_mean": 0.0024214223230956122, "epoch": 11.522448979591836, "grad_norm": 0.12216098606586456, "learning_rate": 1e-06, "loss": 0.0463, "step": 760 }, { "clip_ratio/high_max": 0.0032536294929741416, "clip_ratio/high_mean": 0.0014053815648367163, "clip_ratio/low_mean": 0.0011871640526806004, "clip_ratio/low_min": 8.931851607485441e-05, "clip_ratio/region_mean": 0.002592545555671677, "epoch": 11.531778425655977, "grad_norm": 0.11559303849935532, "learning_rate": 1e-06, "loss": -0.0374, "step": 761 }, { "clip_ratio/high_max": 0.0032493172475369647, "clip_ratio/high_mean": 0.0013286821958899964, "clip_ratio/low_mean": 0.0008384985521843191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021671807262464426, "epoch": 11.541107871720117, "grad_norm": 0.11857152730226517, "learning_rate": 1e-06, "loss": -0.0626, "step": 762 }, { "clip_ratio/high_max": 0.0028308008913882077, "clip_ratio/high_mean": 0.0011588010929699522, "clip_ratio/low_mean": 0.0011810539381258423, "clip_ratio/low_min": 7.284887396963313e-05, "clip_ratio/region_mean": 0.002339855011086911, "epoch": 11.550437317784256, "grad_norm": 0.11263173073530197, "learning_rate": 1e-06, "loss": -0.0117, "step": 763 }, { "clip_ratio/high_max": 0.00292706972322776, "clip_ratio/high_mean": 0.0011468915627119713, "clip_ratio/low_mean": 0.0013457503573590657, "clip_ratio/low_min": 1.806358341127634e-05, "clip_ratio/region_mean": 0.002492641913704574, "epoch": 11.559766763848396, "grad_norm": 0.1806187480688095, "learning_rate": 1e-06, "loss": -0.0201, "step": 764 }, { "clip_ratio/high_max": 0.002420545235509053, "clip_ratio/high_mean": 0.0010321046538592782, "clip_ratio/low_mean": 0.0014250486055971123, "clip_ratio/low_min": 0.00011616915980994236, "clip_ratio/region_mean": 0.002457153248542454, "epoch": 11.569096209912537, "grad_norm": 0.11586566269397736, "learning_rate": 1e-06, "loss": -0.0038, "step": 765 }, { "clip_ratio/high_max": 0.002730551153945271, "clip_ratio/high_mean": 0.0011794552083301824, "clip_ratio/low_mean": 0.0012456532458600122, "clip_ratio/low_min": 8.259169226221275e-05, "clip_ratio/region_mean": 0.0024251084178104065, "epoch": 11.578425655976677, "grad_norm": 0.1318320780992508, "learning_rate": 1e-06, "loss": -0.0186, "step": 766 }, { "clip_ratio/high_max": 0.003323022261611186, "clip_ratio/high_mean": 0.0013961967397335684, "clip_ratio/low_mean": 0.0010770241315185558, "clip_ratio/low_min": 0.00016406031681981403, "clip_ratio/region_mean": 0.002473220869433135, "epoch": 11.587755102040816, "grad_norm": 0.12317539751529694, "learning_rate": 1e-06, "loss": -0.0453, "step": 767 }, { "clip_ratio/high_max": 0.002865194182959385, "clip_ratio/high_mean": 0.001273908117582323, "clip_ratio/low_mean": 0.0011619734432315454, "clip_ratio/low_min": 4.2286872485419735e-05, "clip_ratio/region_mean": 0.002435881513520144, "epoch": 11.597084548104956, "grad_norm": 0.13387911021709442, "learning_rate": 1e-06, "loss": -0.0092, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.042881556919642905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 711.5376586914062, "completions/mean_terminated_length": 559.904296875, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 12.00932944606414, "grad_norm": 0.12344630062580109, "learning_rate": 1e-06, "loss": 0.0313, "num_tokens": 481489698.0, "reward": 0.6261858344078064, "reward_std": 0.1730087548494339, "rewards/simpleverify_reward/mean": 0.6261858344078064, "rewards/simpleverify_reward/std": 0.48381945490837097, "step": 769 }, { "clip_ratio/high_max": 0.001982039520953549, "clip_ratio/high_mean": 0.000854600011734874, "clip_ratio/low_mean": 0.0005202167030802229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013748167039011605, "epoch": 12.018658892128279, "grad_norm": 0.12510351836681366, "learning_rate": 1e-06, "loss": -0.0493, "step": 770 }, { "clip_ratio/high_max": 0.0018740668492682744, "clip_ratio/high_mean": 0.0007808510454196949, "clip_ratio/low_mean": 0.0004294324535294436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012102835316909477, "epoch": 12.02798833819242, "grad_norm": 0.1354844719171524, "learning_rate": 1e-06, "loss": -0.0253, "step": 771 }, { "clip_ratio/high_max": 0.0019721556527656503, "clip_ratio/high_mean": 0.0007854262312321225, "clip_ratio/low_mean": 0.0006107273266025004, "clip_ratio/low_min": 2.8629018743231427e-05, "clip_ratio/region_mean": 0.0013961535551061388, "epoch": 12.03731778425656, "grad_norm": 0.12820011377334595, "learning_rate": 1e-06, "loss": 0.0082, "step": 772 }, { "clip_ratio/high_max": 0.001995891892875079, "clip_ratio/high_mean": 0.0008941908308770508, "clip_ratio/low_mean": 0.0005449218706417014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014391127042472363, "epoch": 12.0466472303207, "grad_norm": 0.14113283157348633, "learning_rate": 1e-06, "loss": -0.0236, "step": 773 }, { "clip_ratio/high_max": 0.00212800425288151, "clip_ratio/high_mean": 0.0008312418367495411, "clip_ratio/low_mean": 0.0005162234765521134, "clip_ratio/low_min": 1.27577059174655e-05, "clip_ratio/region_mean": 0.0013474652987497393, "epoch": 12.055976676384839, "grad_norm": 0.12578293681144714, "learning_rate": 1e-06, "loss": -0.0031, "step": 774 }, { "clip_ratio/high_max": 0.00207770339329727, "clip_ratio/high_mean": 0.0008650214276713086, "clip_ratio/low_mean": 0.0008076959747995716, "clip_ratio/low_min": 1.3898154065827839e-05, "clip_ratio/region_mean": 0.0016727174224797636, "epoch": 12.06530612244898, "grad_norm": 0.13031761348247528, "learning_rate": 1e-06, "loss": 0.0144, "step": 775 }, { "clip_ratio/high_max": 0.002384588609857019, "clip_ratio/high_mean": 0.0010925433452939615, "clip_ratio/low_mean": 0.0008189423151634401, "clip_ratio/low_min": 4.086302578798495e-05, "clip_ratio/region_mean": 0.0019114856913802214, "epoch": 12.07463556851312, "grad_norm": 0.1263548582792282, "learning_rate": 1e-06, "loss": -0.0248, "step": 776 }, { "clip_ratio/high_max": 0.002328166832739953, "clip_ratio/high_mean": 0.0009084943048947025, "clip_ratio/low_mean": 0.0007481899956474081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001656684293266153, "epoch": 12.08396501457726, "grad_norm": 0.12414831668138504, "learning_rate": 1e-06, "loss": -0.0062, "step": 777 }, { "clip_ratio/high_max": 0.00216972162888851, "clip_ratio/high_mean": 0.0009422271541552618, "clip_ratio/low_mean": 0.0008486050264764344, "clip_ratio/low_min": 4.4227801481611095e-05, "clip_ratio/region_mean": 0.0017908321678987704, "epoch": 12.093294460641399, "grad_norm": 0.1291738897562027, "learning_rate": 1e-06, "loss": -0.0171, "step": 778 }, { "clip_ratio/high_max": 0.002544457332987804, "clip_ratio/high_mean": 0.0011278465208306443, "clip_ratio/low_mean": 0.0008483778992740554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001976224419195205, "epoch": 12.102623906705539, "grad_norm": 0.12321454286575317, "learning_rate": 1e-06, "loss": -0.0424, "step": 779 }, { "clip_ratio/high_max": 0.0022426467839977704, "clip_ratio/high_mean": 0.0009335520517197438, "clip_ratio/low_mean": 0.0007896645665823598, "clip_ratio/low_min": 4.5110986320651136e-05, "clip_ratio/region_mean": 0.0017232166501344182, "epoch": 12.11195335276968, "grad_norm": 0.11238522082567215, "learning_rate": 1e-06, "loss": -0.0324, "step": 780 }, { "clip_ratio/high_max": 0.0020828996202908456, "clip_ratio/high_mean": 0.0009151240883511491, "clip_ratio/low_mean": 0.0008842037786962464, "clip_ratio/low_min": 2.4680372916918714e-05, "clip_ratio/region_mean": 0.0017993278815993108, "epoch": 12.12128279883382, "grad_norm": 0.13164466619491577, "learning_rate": 1e-06, "loss": -0.0101, "step": 781 }, { "clip_ratio/high_max": 0.0017927900298673194, "clip_ratio/high_mean": 0.0008759616248426028, "clip_ratio/low_mean": 0.0010220490330539178, "clip_ratio/low_min": 5.633168257190846e-05, "clip_ratio/region_mean": 0.001898010632430669, "epoch": 12.130612244897959, "grad_norm": 0.13335801661014557, "learning_rate": 1e-06, "loss": -0.0222, "step": 782 }, { "clip_ratio/high_max": 0.0024858457618393004, "clip_ratio/high_mean": 0.0009664298386269365, "clip_ratio/low_mean": 0.0010804744051711168, "clip_ratio/low_min": 6.860592839075252e-05, "clip_ratio/region_mean": 0.002046904221060686, "epoch": 12.139941690962099, "grad_norm": 0.12098106741905212, "learning_rate": 1e-06, "loss": -0.0088, "step": 783 }, { "clip_ratio/high_max": 0.002409208995231893, "clip_ratio/high_mean": 0.0010208608000539243, "clip_ratio/low_mean": 0.0008848585193845793, "clip_ratio/low_min": 0.00013395625683187973, "clip_ratio/region_mean": 0.001905719327623956, "epoch": 12.14927113702624, "grad_norm": 0.12097387760877609, "learning_rate": 1e-06, "loss": -0.0151, "step": 784 }, { "clip_ratio/high_max": 0.0022664294665446505, "clip_ratio/high_mean": 0.0010413722302473616, "clip_ratio/low_mean": 0.0008924802932597231, "clip_ratio/low_min": 4.85528735225671e-05, "clip_ratio/region_mean": 0.0019338524944032542, "epoch": 12.15860058309038, "grad_norm": 0.11832785606384277, "learning_rate": 1e-06, "loss": -0.0347, "step": 785 }, { "clip_ratio/high_max": 0.0019698569376487285, "clip_ratio/high_mean": 0.000833251640870003, "clip_ratio/low_mean": 0.0009684636133897584, "clip_ratio/low_min": 3.361736889928579e-05, "clip_ratio/region_mean": 0.0018017152360698674, "epoch": 12.167930029154519, "grad_norm": 0.11735925078392029, "learning_rate": 1e-06, "loss": 0.0627, "step": 786 }, { "clip_ratio/high_max": 0.0023884439797257073, "clip_ratio/high_mean": 0.0010025802948803175, "clip_ratio/low_mean": 0.0010688419788493775, "clip_ratio/low_min": 8.836338929540943e-05, "clip_ratio/region_mean": 0.0020714222409878857, "epoch": 12.177259475218658, "grad_norm": 0.12652647495269775, "learning_rate": 1e-06, "loss": 0.0046, "step": 787 }, { "clip_ratio/high_max": 0.0021524110779864714, "clip_ratio/high_mean": 0.0008977284724096535, "clip_ratio/low_mean": 0.0009293914608861087, "clip_ratio/low_min": 3.6704934245790355e-05, "clip_ratio/region_mean": 0.0018271198860020377, "epoch": 12.186588921282798, "grad_norm": 0.1132396012544632, "learning_rate": 1e-06, "loss": 0.0223, "step": 788 }, { "clip_ratio/high_max": 0.0023530725593445823, "clip_ratio/high_mean": 0.0009946757309080567, "clip_ratio/low_mean": 0.0010905117524089292, "clip_ratio/low_min": 2.194672833866207e-05, "clip_ratio/region_mean": 0.002085187479679007, "epoch": 12.19591836734694, "grad_norm": 0.12985463440418243, "learning_rate": 1e-06, "loss": -0.0084, "step": 789 }, { "clip_ratio/high_max": 0.00261299560952466, "clip_ratio/high_mean": 0.0011101696763944346, "clip_ratio/low_mean": 0.0008699340041857795, "clip_ratio/low_min": 3.603508594096638e-05, "clip_ratio/region_mean": 0.0019801036396529526, "epoch": 12.205247813411079, "grad_norm": 0.1184607520699501, "learning_rate": 1e-06, "loss": -0.0285, "step": 790 }, { "clip_ratio/high_max": 0.002717594805289991, "clip_ratio/high_mean": 0.0011332717731420416, "clip_ratio/low_mean": 0.0010408923462819075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021741640885011293, "epoch": 12.214577259475218, "grad_norm": 0.13032302260398865, "learning_rate": 1e-06, "loss": 0.0097, "step": 791 }, { "clip_ratio/high_max": 0.002858444830053486, "clip_ratio/high_mean": 0.0011575926982914098, "clip_ratio/low_mean": 0.0009146167885774048, "clip_ratio/low_min": 9.365544701722683e-05, "clip_ratio/region_mean": 0.002072209506877698, "epoch": 12.223906705539358, "grad_norm": 0.12457794696092606, "learning_rate": 1e-06, "loss": -0.0125, "step": 792 }, { "clip_ratio/high_max": 0.002127605323039461, "clip_ratio/high_mean": 0.0010039906974270707, "clip_ratio/low_mean": 0.0009453637940168846, "clip_ratio/low_min": 2.7352298275218345e-05, "clip_ratio/region_mean": 0.001949354467797093, "epoch": 12.2332361516035, "grad_norm": 0.11974716931581497, "learning_rate": 1e-06, "loss": -0.0246, "step": 793 }, { "clip_ratio/high_max": 0.0025850981910480186, "clip_ratio/high_mean": 0.0009408178066223627, "clip_ratio/low_mean": 0.0011635788177954964, "clip_ratio/low_min": 8.449673623545095e-05, "clip_ratio/region_mean": 0.0021043966480647214, "epoch": 12.242565597667639, "grad_norm": 0.1367463916540146, "learning_rate": 1e-06, "loss": 0.0294, "step": 794 }, { "clip_ratio/high_max": 0.0022464876601588912, "clip_ratio/high_mean": 0.0009718564178911038, "clip_ratio/low_mean": 0.0010182424794038525, "clip_ratio/low_min": 7.940767136460636e-05, "clip_ratio/region_mean": 0.0019900988991139457, "epoch": 12.251895043731778, "grad_norm": 0.1325341910123825, "learning_rate": 1e-06, "loss": -0.0359, "step": 795 }, { "clip_ratio/high_max": 0.002219196583610028, "clip_ratio/high_mean": 0.000921394939723541, "clip_ratio/low_mean": 0.001008641684165923, "clip_ratio/low_min": 3.4312379284529015e-05, "clip_ratio/region_mean": 0.0019300366402603686, "epoch": 12.261224489795918, "grad_norm": 0.11588510870933533, "learning_rate": 1e-06, "loss": 0.0114, "step": 796 }, { "clip_ratio/high_max": 0.0027079485953436233, "clip_ratio/high_mean": 0.0010720413556555286, "clip_ratio/low_mean": 0.000967370633588871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002039411996520357, "epoch": 12.270553935860057, "grad_norm": 0.13519005477428436, "learning_rate": 1e-06, "loss": 0.0123, "step": 797 }, { "clip_ratio/high_max": 0.0024604934587841853, "clip_ratio/high_mean": 0.0010963392996927723, "clip_ratio/low_mean": 0.0008216893675125903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019180286617483944, "epoch": 12.279883381924199, "grad_norm": 0.11680714040994644, "learning_rate": 1e-06, "loss": -0.0728, "step": 798 }, { "clip_ratio/high_max": 0.0023618988307134714, "clip_ratio/high_mean": 0.0009946611808118178, "clip_ratio/low_mean": 0.0012194803930469789, "clip_ratio/low_min": 8.814891043584794e-05, "clip_ratio/region_mean": 0.002214141597505659, "epoch": 12.289212827988338, "grad_norm": 0.13376952707767487, "learning_rate": 1e-06, "loss": 0.0463, "step": 799 }, { "clip_ratio/high_max": 0.0025391012877662433, "clip_ratio/high_mean": 0.001017661686091742, "clip_ratio/low_mean": 0.0010309930039511528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020486547145992517, "epoch": 12.298542274052478, "grad_norm": 0.12554509937763214, "learning_rate": 1e-06, "loss": -0.0009, "step": 800 }, { "clip_ratio/high_max": 0.002699669246794656, "clip_ratio/high_mean": 0.0011564240412553772, "clip_ratio/low_mean": 0.0009551375896990066, "clip_ratio/low_min": 4.852309211855754e-05, "clip_ratio/region_mean": 0.0021115616254974157, "epoch": 12.307871720116617, "grad_norm": 0.10621404647827148, "learning_rate": 1e-06, "loss": -0.0342, "step": 801 }, { "clip_ratio/high_max": 0.002637111676449422, "clip_ratio/high_mean": 0.0010181827656197129, "clip_ratio/low_mean": 0.0010148733017558698, "clip_ratio/low_min": 0.00013490813853422878, "clip_ratio/region_mean": 0.0020330560801085085, "epoch": 12.317201166180759, "grad_norm": 0.11737849563360214, "learning_rate": 1e-06, "loss": 0.0047, "step": 802 }, { "clip_ratio/high_max": 0.0028218860461493023, "clip_ratio/high_mean": 0.001119993376050843, "clip_ratio/low_mean": 0.000954437826294452, "clip_ratio/low_min": 3.095094598393189e-05, "clip_ratio/region_mean": 0.0020744312641909346, "epoch": 12.326530612244898, "grad_norm": 0.132485494017601, "learning_rate": 1e-06, "loss": -0.0059, "step": 803 }, { "clip_ratio/high_max": 0.002450728105031885, "clip_ratio/high_mean": 0.0009607499578123679, "clip_ratio/low_mean": 0.0008987103092295001, "clip_ratio/low_min": 1.2525049896794371e-05, "clip_ratio/region_mean": 0.0018594602515804581, "epoch": 12.335860058309038, "grad_norm": 0.11985781043767929, "learning_rate": 1e-06, "loss": -0.0088, "step": 804 }, { "clip_ratio/high_max": 0.0026656500267563388, "clip_ratio/high_mean": 0.001185635082947556, "clip_ratio/low_mean": 0.0009007929838844575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020864279867964797, "epoch": 12.345189504373177, "grad_norm": 0.11155782639980316, "learning_rate": 1e-06, "loss": -0.0509, "step": 805 }, { "clip_ratio/high_max": 0.00200622856937116, "clip_ratio/high_mean": 0.000978238083916949, "clip_ratio/low_mean": 0.0009624249269108986, "clip_ratio/low_min": 8.195697682822356e-05, "clip_ratio/region_mean": 0.001940663052664604, "epoch": 12.354518950437317, "grad_norm": 0.13074827194213867, "learning_rate": 1e-06, "loss": 0.027, "step": 806 }, { "clip_ratio/high_max": 0.0024923087330535054, "clip_ratio/high_mean": 0.0010706364992074668, "clip_ratio/low_mean": 0.0009327660391136305, "clip_ratio/low_min": 1.4737090168637224e-05, "clip_ratio/region_mean": 0.002003402536502108, "epoch": 12.363848396501458, "grad_norm": 0.1202230229973793, "learning_rate": 1e-06, "loss": -0.0184, "step": 807 }, { "clip_ratio/high_max": 0.0026743922499008477, "clip_ratio/high_mean": 0.0011361265987943625, "clip_ratio/low_mean": 0.0008625164191471413, "clip_ratio/low_min": 1.5992834960343316e-05, "clip_ratio/region_mean": 0.001998643001570599, "epoch": 12.373177842565598, "grad_norm": 0.12350955605506897, "learning_rate": 1e-06, "loss": -0.0033, "step": 808 }, { "clip_ratio/high_max": 0.0022155088154249825, "clip_ratio/high_mean": 0.000983483800155227, "clip_ratio/low_mean": 0.0010891106339840917, "clip_ratio/low_min": 0.00011132718009321252, "clip_ratio/region_mean": 0.0020725944777950644, "epoch": 12.382507288629737, "grad_norm": 0.12260499596595764, "learning_rate": 1e-06, "loss": 0.0162, "step": 809 }, { "clip_ratio/high_max": 0.00277569120953558, "clip_ratio/high_mean": 0.0012253020504431333, "clip_ratio/low_mean": 0.0010829724778886884, "clip_ratio/low_min": 6.794331875425996e-05, "clip_ratio/region_mean": 0.002308274539245758, "epoch": 12.391836734693877, "grad_norm": 0.12466733157634735, "learning_rate": 1e-06, "loss": -0.0023, "step": 810 }, { "clip_ratio/high_max": 0.0023973426868906245, "clip_ratio/high_mean": 0.0010406812180008274, "clip_ratio/low_mean": 0.0011516396698425524, "clip_ratio/low_min": 0.00010277959972881945, "clip_ratio/region_mean": 0.0021923209351371042, "epoch": 12.401166180758018, "grad_norm": 0.12773151695728302, "learning_rate": 1e-06, "loss": 0.0029, "step": 811 }, { "clip_ratio/high_max": 0.0024643362339702435, "clip_ratio/high_mean": 0.0009923261059157085, "clip_ratio/low_mean": 0.0009225446028722217, "clip_ratio/low_min": 1.9476472516544163e-05, "clip_ratio/region_mean": 0.001914870703330962, "epoch": 12.410495626822158, "grad_norm": 0.12679500877857208, "learning_rate": 1e-06, "loss": 0.0239, "step": 812 }, { "clip_ratio/high_max": 0.0028649861851590686, "clip_ratio/high_mean": 0.0012896785337943584, "clip_ratio/low_mean": 0.0008719073084648699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021615857767756097, "epoch": 12.419825072886297, "grad_norm": 0.12192143499851227, "learning_rate": 1e-06, "loss": -0.0683, "step": 813 }, { "clip_ratio/high_max": 0.0022357447451213375, "clip_ratio/high_mean": 0.001055348438967485, "clip_ratio/low_mean": 0.0010795788039104082, "clip_ratio/low_min": 3.757138620130718e-05, "clip_ratio/region_mean": 0.002134927264705766, "epoch": 12.429154518950437, "grad_norm": 0.11535592377185822, "learning_rate": 1e-06, "loss": -0.0201, "step": 814 }, { "clip_ratio/high_max": 0.00261023678467609, "clip_ratio/high_mean": 0.0010642307079251623, "clip_ratio/low_mean": 0.0012588719800987747, "clip_ratio/low_min": 0.0001318553877354134, "clip_ratio/region_mean": 0.0023231026498251595, "epoch": 12.438483965014576, "grad_norm": 0.1488509625196457, "learning_rate": 1e-06, "loss": 0.0388, "step": 815 }, { "clip_ratio/high_max": 0.002711935027036816, "clip_ratio/high_mean": 0.0011548065886017866, "clip_ratio/low_mean": 0.0011424495532992296, "clip_ratio/low_min": 0.00012122636508138385, "clip_ratio/region_mean": 0.0022972561346250586, "epoch": 12.447813411078718, "grad_norm": 0.11600733548402786, "learning_rate": 1e-06, "loss": -0.0042, "step": 816 }, { "clip_ratio/high_max": 0.002823381742928177, "clip_ratio/high_mean": 0.001177904719952494, "clip_ratio/low_mean": 0.0010691349270928185, "clip_ratio/low_min": 7.76533206590102e-05, "clip_ratio/region_mean": 0.0022470396434073336, "epoch": 12.457142857142857, "grad_norm": 0.126500204205513, "learning_rate": 1e-06, "loss": -0.0411, "step": 817 }, { "clip_ratio/high_max": 0.002562906942330301, "clip_ratio/high_mean": 0.0011726165867003147, "clip_ratio/low_mean": 0.0009087096295843367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020813262672163546, "epoch": 12.466472303206997, "grad_norm": 0.11181089282035828, "learning_rate": 1e-06, "loss": -0.0637, "step": 818 }, { "clip_ratio/high_max": 0.002906414258177392, "clip_ratio/high_mean": 0.0010576440254226327, "clip_ratio/low_mean": 0.0009552777355565922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002012921766436193, "epoch": 12.475801749271136, "grad_norm": 0.11564455181360245, "learning_rate": 1e-06, "loss": 0.006, "step": 819 }, { "clip_ratio/high_max": 0.002191215658967849, "clip_ratio/high_mean": 0.0010204338850599015, "clip_ratio/low_mean": 0.0012578281384776346, "clip_ratio/low_min": 7.845249638194218e-05, "clip_ratio/region_mean": 0.0022782620217185467, "epoch": 12.485131195335278, "grad_norm": 0.1232963427901268, "learning_rate": 1e-06, "loss": 0.0096, "step": 820 }, { "clip_ratio/high_max": 0.0027991028400720097, "clip_ratio/high_mean": 0.001012583195915795, "clip_ratio/low_mean": 0.0009260970527975587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019386802523513325, "epoch": 12.494460641399417, "grad_norm": 0.1163288801908493, "learning_rate": 1e-06, "loss": -0.0082, "step": 821 }, { "clip_ratio/high_max": 0.0027422422790550627, "clip_ratio/high_mean": 0.001195595152239548, "clip_ratio/low_mean": 0.0009774352020031074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002173030356061645, "epoch": 12.503790087463557, "grad_norm": 0.12730132043361664, "learning_rate": 1e-06, "loss": -0.0323, "step": 822 }, { "clip_ratio/high_max": 0.0025074906006921083, "clip_ratio/high_mean": 0.0011130772436445113, "clip_ratio/low_mean": 0.0010403667711216258, "clip_ratio/low_min": 0.0001829902485042112, "clip_ratio/region_mean": 0.0021534439001698047, "epoch": 12.513119533527696, "grad_norm": 0.1222425103187561, "learning_rate": 1e-06, "loss": -0.0127, "step": 823 }, { "clip_ratio/high_max": 0.0022122438240330666, "clip_ratio/high_mean": 0.0009195006714435294, "clip_ratio/low_mean": 0.0010709663856687257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001990467091673054, "epoch": 12.522448979591836, "grad_norm": 0.12297448515892029, "learning_rate": 1e-06, "loss": 0.025, "step": 824 }, { "clip_ratio/high_max": 0.00236048595252214, "clip_ratio/high_mean": 0.0010837599802471232, "clip_ratio/low_mean": 0.0010024465736933053, "clip_ratio/low_min": 6.323777051875368e-05, "clip_ratio/region_mean": 0.0020862065357505344, "epoch": 12.531778425655977, "grad_norm": 0.1306031346321106, "learning_rate": 1e-06, "loss": -0.0088, "step": 825 }, { "clip_ratio/high_max": 0.002812127182551194, "clip_ratio/high_mean": 0.0011292535746179055, "clip_ratio/low_mean": 0.0010131762010132661, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021424297883640975, "epoch": 12.541107871720117, "grad_norm": 0.11995650827884674, "learning_rate": 1e-06, "loss": -0.0123, "step": 826 }, { "clip_ratio/high_max": 0.0029836383764632046, "clip_ratio/high_mean": 0.001176291967567522, "clip_ratio/low_mean": 0.0012050632140017115, "clip_ratio/low_min": 5.442845485958969e-05, "clip_ratio/region_mean": 0.0023813552033971064, "epoch": 12.550437317784256, "grad_norm": 0.12899866700172424, "learning_rate": 1e-06, "loss": -0.0237, "step": 827 }, { "clip_ratio/high_max": 0.0027070367214037105, "clip_ratio/high_mean": 0.0011146233809995465, "clip_ratio/low_mean": 0.0011652918146864977, "clip_ratio/low_min": 8.443019032711163e-05, "clip_ratio/region_mean": 0.0022799151556682773, "epoch": 12.559766763848396, "grad_norm": 0.1434909552335739, "learning_rate": 1e-06, "loss": -0.0385, "step": 828 }, { "clip_ratio/high_max": 0.0032792178681120276, "clip_ratio/high_mean": 0.001310497567828861, "clip_ratio/low_mean": 0.001049492742822622, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00235999035066925, "epoch": 12.569096209912537, "grad_norm": 0.1292286515235901, "learning_rate": 1e-06, "loss": -0.0159, "step": 829 }, { "clip_ratio/high_max": 0.0026349084728281014, "clip_ratio/high_mean": 0.0012092284778191242, "clip_ratio/low_mean": 0.001106671250454383, "clip_ratio/low_min": 4.5870066969655454e-05, "clip_ratio/region_mean": 0.0023158997209975496, "epoch": 12.578425655976677, "grad_norm": 0.1191285103559494, "learning_rate": 1e-06, "loss": -0.0002, "step": 830 }, { "clip_ratio/high_max": 0.0027554127882467583, "clip_ratio/high_mean": 0.001035985453199828, "clip_ratio/low_mean": 0.0011049505956179928, "clip_ratio/low_min": 6.122842205513734e-05, "clip_ratio/region_mean": 0.002140936026989948, "epoch": 12.587755102040816, "grad_norm": 0.11864615976810455, "learning_rate": 1e-06, "loss": -0.0224, "step": 831 }, { "clip_ratio/high_max": 0.0022250700785662048, "clip_ratio/high_mean": 0.001027084690576885, "clip_ratio/low_mean": 0.0010864675286939018, "clip_ratio/low_min": 0.00010635959915816784, "clip_ratio/region_mean": 0.0021135521747055463, "epoch": 12.597084548104956, "grad_norm": 0.12421823292970657, "learning_rate": 1e-06, "loss": -0.0137, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0462123325892857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 715.6293334960938, "completions/mean_terminated_length": 551.8456420898438, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 13.00932944606414, "grad_norm": 0.13414518535137177, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 517420488.0, "reward": 0.632516086101532, "reward_std": 0.17390279471874237, "rewards/simpleverify_reward/mean": 0.6325160264968872, "rewards/simpleverify_reward/std": 0.4821240305900574, "step": 833 }, { "clip_ratio/high_max": 0.0019830406636174303, "clip_ratio/high_mean": 0.000829216519377951, "clip_ratio/low_mean": 0.000586779029617901, "clip_ratio/low_min": 4.197316684440011e-05, "clip_ratio/region_mean": 0.001415995524439495, "epoch": 13.018658892128279, "grad_norm": 0.12629295885562897, "learning_rate": 1e-06, "loss": 0.0185, "step": 834 }, { "clip_ratio/high_max": 0.002519905785447918, "clip_ratio/high_mean": 0.0009686950106697623, "clip_ratio/low_mean": 0.00043712686965591274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014058219057915267, "epoch": 13.02798833819242, "grad_norm": 0.1319560408592224, "learning_rate": 1e-06, "loss": -0.0305, "step": 835 }, { "clip_ratio/high_max": 0.0020775691082235426, "clip_ratio/high_mean": 0.0008421860475209542, "clip_ratio/low_mean": 0.00046514722089341376, "clip_ratio/low_min": 1.4977234968682751e-05, "clip_ratio/region_mean": 0.001307333259319421, "epoch": 13.03731778425656, "grad_norm": 0.13775719702243805, "learning_rate": 1e-06, "loss": -0.0125, "step": 836 }, { "clip_ratio/high_max": 0.0022130011893750634, "clip_ratio/high_mean": 0.0009837952857196797, "clip_ratio/low_mean": 0.0005082932439108845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001492088515078649, "epoch": 13.0466472303207, "grad_norm": 0.11993347108364105, "learning_rate": 1e-06, "loss": -0.0384, "step": 837 }, { "clip_ratio/high_max": 0.0018777957957354374, "clip_ratio/high_mean": 0.0007917759703559568, "clip_ratio/low_mean": 0.0006243171310416074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014160930950311013, "epoch": 13.055976676384839, "grad_norm": 0.12756216526031494, "learning_rate": 1e-06, "loss": 0.0073, "step": 838 }, { "clip_ratio/high_max": 0.002551495796069503, "clip_ratio/high_mean": 0.001109146967792185, "clip_ratio/low_mean": 0.0005956410623184638, "clip_ratio/low_min": 1.6399895685026422e-05, "clip_ratio/region_mean": 0.0017047880028258078, "epoch": 13.06530612244898, "grad_norm": 0.12596020102500916, "learning_rate": 1e-06, "loss": -0.0747, "step": 839 }, { "clip_ratio/high_max": 0.0025459724383836146, "clip_ratio/high_mean": 0.000972008281678427, "clip_ratio/low_mean": 0.0006664229513262399, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016384312220907304, "epoch": 13.07463556851312, "grad_norm": 0.12022960931062698, "learning_rate": 1e-06, "loss": 0.0079, "step": 840 }, { "clip_ratio/high_max": 0.002262051966681611, "clip_ratio/high_mean": 0.001028937302180566, "clip_ratio/low_mean": 0.00063529398994433, "clip_ratio/low_min": 2.2340045688906685e-05, "clip_ratio/region_mean": 0.0016642313203192316, "epoch": 13.08396501457726, "grad_norm": 0.12536147236824036, "learning_rate": 1e-06, "loss": -0.0695, "step": 841 }, { "clip_ratio/high_max": 0.0019748207269003615, "clip_ratio/high_mean": 0.0008613280915596988, "clip_ratio/low_mean": 0.0006797303367420682, "clip_ratio/low_min": 1.2124150998715777e-05, "clip_ratio/region_mean": 0.0015410584746859968, "epoch": 13.093294460641399, "grad_norm": 0.13648957014083862, "learning_rate": 1e-06, "loss": -0.005, "step": 842 }, { "clip_ratio/high_max": 0.0021365920729294885, "clip_ratio/high_mean": 0.0008052554057940142, "clip_ratio/low_mean": 0.00081346208571631, "clip_ratio/low_min": 3.0599756428273395e-05, "clip_ratio/region_mean": 0.0016187174915103242, "epoch": 13.102623906705539, "grad_norm": 0.10979168862104416, "learning_rate": 1e-06, "loss": 0.0112, "step": 843 }, { "clip_ratio/high_max": 0.00238527319743298, "clip_ratio/high_mean": 0.000985304759524297, "clip_ratio/low_mean": 0.0008162400827131933, "clip_ratio/low_min": 5.832644546899246e-05, "clip_ratio/region_mean": 0.0018015448222286068, "epoch": 13.11195335276968, "grad_norm": 0.13775122165679932, "learning_rate": 1e-06, "loss": -0.016, "step": 844 }, { "clip_ratio/high_max": 0.0024285401159431785, "clip_ratio/high_mean": 0.0010776535491459072, "clip_ratio/low_mean": 0.0009538254817016423, "clip_ratio/low_min": 3.557199670467526e-05, "clip_ratio/region_mean": 0.0020314791036071256, "epoch": 13.12128279883382, "grad_norm": 0.1199856624007225, "learning_rate": 1e-06, "loss": -0.0403, "step": 845 }, { "clip_ratio/high_max": 0.0021228468031040393, "clip_ratio/high_mean": 0.0009413827538082842, "clip_ratio/low_mean": 0.0008255953034677077, "clip_ratio/low_min": 3.4370357752777636e-05, "clip_ratio/region_mean": 0.0017669780645519495, "epoch": 13.130612244897959, "grad_norm": 0.12401924282312393, "learning_rate": 1e-06, "loss": -0.0354, "step": 846 }, { "clip_ratio/high_max": 0.0023706748979748227, "clip_ratio/high_mean": 0.0009638159781388822, "clip_ratio/low_mean": 0.0009370964025947615, "clip_ratio/low_min": 1.938585592142772e-05, "clip_ratio/region_mean": 0.001900912364362739, "epoch": 13.139941690962099, "grad_norm": 0.11542689800262451, "learning_rate": 1e-06, "loss": -0.0238, "step": 847 }, { "clip_ratio/high_max": 0.0022999602297204547, "clip_ratio/high_mean": 0.0009680519342509797, "clip_ratio/low_mean": 0.000854471565617132, "clip_ratio/low_min": 2.4925224352045916e-05, "clip_ratio/region_mean": 0.001822523510782048, "epoch": 13.14927113702624, "grad_norm": 0.11744103580713272, "learning_rate": 1e-06, "loss": -0.0344, "step": 848 }, { "clip_ratio/high_max": 0.0021957652061246336, "clip_ratio/high_mean": 0.0010111743104062043, "clip_ratio/low_mean": 0.0008652507840452017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001876425085356459, "epoch": 13.15860058309038, "grad_norm": 0.15531907975673676, "learning_rate": 1e-06, "loss": -0.0228, "step": 849 }, { "clip_ratio/high_max": 0.002185033998102881, "clip_ratio/high_mean": 0.0009567482593411114, "clip_ratio/low_mean": 0.0009446445146750193, "clip_ratio/low_min": 1.256533960258821e-05, "clip_ratio/region_mean": 0.001901392766740173, "epoch": 13.167930029154519, "grad_norm": 0.1306806057691574, "learning_rate": 1e-06, "loss": 0.0044, "step": 850 }, { "clip_ratio/high_max": 0.002366046766837826, "clip_ratio/high_mean": 0.0009903839863909525, "clip_ratio/low_mean": 0.0010551582599873655, "clip_ratio/low_min": 4.884605550614651e-05, "clip_ratio/region_mean": 0.002045542249106802, "epoch": 13.177259475218658, "grad_norm": 0.12941259145736694, "learning_rate": 1e-06, "loss": 0.0096, "step": 851 }, { "clip_ratio/high_max": 0.0019359269681444857, "clip_ratio/high_mean": 0.0008711113023309736, "clip_ratio/low_mean": 0.0010929880018011318, "clip_ratio/low_min": 9.626067276258254e-05, "clip_ratio/region_mean": 0.0019640991995402146, "epoch": 13.186588921282798, "grad_norm": 0.12935452163219452, "learning_rate": 1e-06, "loss": 0.0089, "step": 852 }, { "clip_ratio/high_max": 0.002536605370551115, "clip_ratio/high_mean": 0.0009768370000529103, "clip_ratio/low_mean": 0.0009711972415971104, "clip_ratio/low_min": 4.192036249150988e-05, "clip_ratio/region_mean": 0.0019480342816677876, "epoch": 13.19591836734694, "grad_norm": 0.12276334315538406, "learning_rate": 1e-06, "loss": -0.0215, "step": 853 }, { "clip_ratio/high_max": 0.002183154851081781, "clip_ratio/high_mean": 0.000942244058023789, "clip_ratio/low_mean": 0.0011278330639470369, "clip_ratio/low_min": 6.183659661473939e-05, "clip_ratio/region_mean": 0.0020700771565316245, "epoch": 13.205247813411079, "grad_norm": 0.12107101082801819, "learning_rate": 1e-06, "loss": 0.0097, "step": 854 }, { "clip_ratio/high_max": 0.002543337039242033, "clip_ratio/high_mean": 0.0009860966620180989, "clip_ratio/low_mean": 0.0009956289177353028, "clip_ratio/low_min": 2.192982537962962e-05, "clip_ratio/region_mean": 0.001981725567020476, "epoch": 13.214577259475218, "grad_norm": 0.12721332907676697, "learning_rate": 1e-06, "loss": -0.0085, "step": 855 }, { "clip_ratio/high_max": 0.0023810036436771043, "clip_ratio/high_mean": 0.0010373615623393562, "clip_ratio/low_mean": 0.00102116559719434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002058527199551463, "epoch": 13.223906705539358, "grad_norm": 0.12266834080219269, "learning_rate": 1e-06, "loss": -0.0433, "step": 856 }, { "clip_ratio/high_max": 0.002492389270628337, "clip_ratio/high_mean": 0.0009353009791084332, "clip_ratio/low_mean": 0.0009484468100708909, "clip_ratio/low_min": 2.4457052859361283e-05, "clip_ratio/region_mean": 0.0018837477909983136, "epoch": 13.2332361516035, "grad_norm": 0.14596796035766602, "learning_rate": 1e-06, "loss": -0.0007, "step": 857 }, { "clip_ratio/high_max": 0.002661496699147392, "clip_ratio/high_mean": 0.0012286690507608, "clip_ratio/low_mean": 0.0008065060428634752, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020351750499685295, "epoch": 13.242565597667639, "grad_norm": 0.12908431887626648, "learning_rate": 1e-06, "loss": -0.0752, "step": 858 }, { "clip_ratio/high_max": 0.0025324390226160176, "clip_ratio/high_mean": 0.0010505361897230614, "clip_ratio/low_mean": 0.001047112589731114, "clip_ratio/low_min": 7.459139942511683e-05, "clip_ratio/region_mean": 0.002097648808558006, "epoch": 13.251895043731778, "grad_norm": 0.12802553176879883, "learning_rate": 1e-06, "loss": -0.0043, "step": 859 }, { "clip_ratio/high_max": 0.0023204233293654397, "clip_ratio/high_mean": 0.001041752941091545, "clip_ratio/low_mean": 0.0009563090334268054, "clip_ratio/low_min": 5.52955661987653e-05, "clip_ratio/region_mean": 0.001998061918129679, "epoch": 13.261224489795918, "grad_norm": 0.13391587138175964, "learning_rate": 1e-06, "loss": -0.0129, "step": 860 }, { "clip_ratio/high_max": 0.002237422355392482, "clip_ratio/high_mean": 0.0009927220271492843, "clip_ratio/low_mean": 0.001034128505125409, "clip_ratio/low_min": 1.8121194443665445e-05, "clip_ratio/region_mean": 0.0020268505613785237, "epoch": 13.270553935860057, "grad_norm": 0.12003553658723831, "learning_rate": 1e-06, "loss": -0.0107, "step": 861 }, { "clip_ratio/high_max": 0.0026718492299551144, "clip_ratio/high_mean": 0.0011057402061851462, "clip_ratio/low_mean": 0.000998317602352472, "clip_ratio/low_min": 2.183024844271131e-05, "clip_ratio/region_mean": 0.0021040578503743745, "epoch": 13.279883381924199, "grad_norm": 0.11906471103429794, "learning_rate": 1e-06, "loss": -0.0058, "step": 862 }, { "clip_ratio/high_max": 0.002436835922708269, "clip_ratio/high_mean": 0.0011377226146578323, "clip_ratio/low_mean": 0.0011466565392765915, "clip_ratio/low_min": 6.766712613170967e-05, "clip_ratio/region_mean": 0.0022843791739433073, "epoch": 13.289212827988338, "grad_norm": 0.13598018884658813, "learning_rate": 1e-06, "loss": -0.032, "step": 863 }, { "clip_ratio/high_max": 0.00240250728893443, "clip_ratio/high_mean": 0.0009240321760444203, "clip_ratio/low_mean": 0.0013703959302802105, "clip_ratio/low_min": 5.9777506976388395e-05, "clip_ratio/region_mean": 0.00229442810814362, "epoch": 13.298542274052478, "grad_norm": 0.12569187581539154, "learning_rate": 1e-06, "loss": 0.0154, "step": 864 }, { "clip_ratio/high_max": 0.0024366669458686374, "clip_ratio/high_mean": 0.0011648315248748986, "clip_ratio/low_mean": 0.001137299001129577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023021304805297405, "epoch": 13.307871720116617, "grad_norm": 0.12890926003456116, "learning_rate": 1e-06, "loss": -0.0111, "step": 865 }, { "clip_ratio/high_max": 0.0026204201349173672, "clip_ratio/high_mean": 0.0010493731751921587, "clip_ratio/low_mean": 0.0010249173756164964, "clip_ratio/low_min": 7.662937423447147e-05, "clip_ratio/region_mean": 0.0020742905762745067, "epoch": 13.317201166180759, "grad_norm": 0.12786240875720978, "learning_rate": 1e-06, "loss": -0.0215, "step": 866 }, { "clip_ratio/high_max": 0.0025590010554878972, "clip_ratio/high_mean": 0.0010956065634672996, "clip_ratio/low_mean": 0.0010608637512632413, "clip_ratio/low_min": 2.8189984732307494e-05, "clip_ratio/region_mean": 0.0021564703274634667, "epoch": 13.326530612244898, "grad_norm": 0.130562424659729, "learning_rate": 1e-06, "loss": -0.0255, "step": 867 }, { "clip_ratio/high_max": 0.0033513108792249113, "clip_ratio/high_mean": 0.001242486308910884, "clip_ratio/low_mean": 0.001135861690272577, "clip_ratio/low_min": 9.898459393298253e-05, "clip_ratio/region_mean": 0.002378347904596012, "epoch": 13.335860058309038, "grad_norm": 0.14444439113140106, "learning_rate": 1e-06, "loss": 0.0276, "step": 868 }, { "clip_ratio/high_max": 0.003126221774436999, "clip_ratio/high_mean": 0.001074210802471498, "clip_ratio/low_mean": 0.0009838177684287075, "clip_ratio/low_min": 4.917968726658728e-05, "clip_ratio/region_mean": 0.002058028578176163, "epoch": 13.345189504373177, "grad_norm": 0.12873801589012146, "learning_rate": 1e-06, "loss": -0.026, "step": 869 }, { "clip_ratio/high_max": 0.0026007405613199808, "clip_ratio/high_mean": 0.0009901737230393337, "clip_ratio/low_mean": 0.0011166982949362136, "clip_ratio/low_min": 4.587155854096636e-05, "clip_ratio/region_mean": 0.0021068719725008123, "epoch": 13.354518950437317, "grad_norm": 0.12822173535823822, "learning_rate": 1e-06, "loss": 0.0314, "step": 870 }, { "clip_ratio/high_max": 0.0028998194757150486, "clip_ratio/high_mean": 0.001148977673437912, "clip_ratio/low_mean": 0.0009931444365065545, "clip_ratio/low_min": 3.6829504097113386e-05, "clip_ratio/region_mean": 0.0021421221099444665, "epoch": 13.363848396501458, "grad_norm": 0.12017261981964111, "learning_rate": 1e-06, "loss": -0.0332, "step": 871 }, { "clip_ratio/high_max": 0.002861479646526277, "clip_ratio/high_mean": 0.0011401223673601635, "clip_ratio/low_mean": 0.001102754142266349, "clip_ratio/low_min": 4.90837701363489e-05, "clip_ratio/region_mean": 0.002242876493255608, "epoch": 13.373177842565598, "grad_norm": 0.12584428489208221, "learning_rate": 1e-06, "loss": 0.0115, "step": 872 }, { "clip_ratio/high_max": 0.002275574619488907, "clip_ratio/high_mean": 0.0010041766422546061, "clip_ratio/low_mean": 0.0010493665486137616, "clip_ratio/low_min": 4.434720904100686e-05, "clip_ratio/region_mean": 0.002053543126748991, "epoch": 13.382507288629737, "grad_norm": 0.12351217120885849, "learning_rate": 1e-06, "loss": -0.0225, "step": 873 }, { "clip_ratio/high_max": 0.0027673511940520257, "clip_ratio/high_mean": 0.0013027097847952973, "clip_ratio/low_mean": 0.0009989740756282117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002301683896803297, "epoch": 13.391836734693877, "grad_norm": 0.1255517154932022, "learning_rate": 1e-06, "loss": -0.0638, "step": 874 }, { "clip_ratio/high_max": 0.002537508604291361, "clip_ratio/high_mean": 0.0011440946836955845, "clip_ratio/low_mean": 0.0010802688193507493, "clip_ratio/low_min": 1.6747051631682552e-05, "clip_ratio/region_mean": 0.0022243634884944186, "epoch": 13.401166180758018, "grad_norm": 0.13426025211811066, "learning_rate": 1e-06, "loss": 0.0029, "step": 875 }, { "clip_ratio/high_max": 0.0023940327009768225, "clip_ratio/high_mean": 0.0009865128122328315, "clip_ratio/low_mean": 0.001143872446846217, "clip_ratio/low_min": 0.00011511968023114605, "clip_ratio/region_mean": 0.002130385211785324, "epoch": 13.410495626822158, "grad_norm": 0.13342955708503723, "learning_rate": 1e-06, "loss": 0.0557, "step": 876 }, { "clip_ratio/high_max": 0.0021587336450465955, "clip_ratio/high_mean": 0.0009314640647062333, "clip_ratio/low_mean": 0.0009676771696831565, "clip_ratio/low_min": 3.904512686858652e-05, "clip_ratio/region_mean": 0.0018991412507602945, "epoch": 13.419825072886297, "grad_norm": 169.31161499023438, "learning_rate": 1e-06, "loss": -0.0246, "step": 877 }, { "clip_ratio/high_max": 0.0026572888673399575, "clip_ratio/high_mean": 0.0011478262495074887, "clip_ratio/low_mean": 0.0009573229472152889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021051491421530955, "epoch": 13.429154518950437, "grad_norm": 0.12571832537651062, "learning_rate": 1e-06, "loss": -0.0261, "step": 878 }, { "clip_ratio/high_max": 0.0026892770220001694, "clip_ratio/high_mean": 0.0011772154048230732, "clip_ratio/low_mean": 0.00111311878390552, "clip_ratio/low_min": 0.00010643437963153701, "clip_ratio/region_mean": 0.0022903342178324237, "epoch": 13.438483965014576, "grad_norm": 0.12426993250846863, "learning_rate": 1e-06, "loss": -0.0212, "step": 879 }, { "clip_ratio/high_max": 0.0029829922132194042, "clip_ratio/high_mean": 0.0013295427525008563, "clip_ratio/low_mean": 0.0010098710590682458, "clip_ratio/low_min": 6.025064431014471e-05, "clip_ratio/region_mean": 0.0023394138042931445, "epoch": 13.447813411078718, "grad_norm": 0.13399332761764526, "learning_rate": 1e-06, "loss": -0.0399, "step": 880 }, { "clip_ratio/high_max": 0.0030106031917966902, "clip_ratio/high_mean": 0.0011709192658599932, "clip_ratio/low_mean": 0.0010956884179904591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022666076765744947, "epoch": 13.457142857142857, "grad_norm": 0.11155635118484497, "learning_rate": 1e-06, "loss": -0.0429, "step": 881 }, { "clip_ratio/high_max": 0.002703793994442094, "clip_ratio/high_mean": 0.0013057460164418444, "clip_ratio/low_mean": 0.0011353579229762545, "clip_ratio/low_min": 2.9592803912237287e-05, "clip_ratio/region_mean": 0.0024411039194092155, "epoch": 13.466472303206997, "grad_norm": 0.19284729659557343, "learning_rate": 1e-06, "loss": -0.0232, "step": 882 }, { "clip_ratio/high_max": 0.0027425165462773293, "clip_ratio/high_mean": 0.001239390367118176, "clip_ratio/low_mean": 0.0011183500464539975, "clip_ratio/low_min": 2.1777002984890714e-05, "clip_ratio/region_mean": 0.002357740420848131, "epoch": 13.475801749271136, "grad_norm": 0.12508141994476318, "learning_rate": 1e-06, "loss": -0.0114, "step": 883 }, { "clip_ratio/high_max": 0.0029979851315147243, "clip_ratio/high_mean": 0.001300359141168883, "clip_ratio/low_mean": 0.0013067465224594343, "clip_ratio/low_min": 8.133424853440374e-05, "clip_ratio/region_mean": 0.002607105576316826, "epoch": 13.485131195335278, "grad_norm": 0.1267627328634262, "learning_rate": 1e-06, "loss": -0.0156, "step": 884 }, { "clip_ratio/high_max": 0.003407486656215042, "clip_ratio/high_mean": 0.0013521736182156019, "clip_ratio/low_mean": 0.0011613209499046206, "clip_ratio/low_min": 4.535382322501391e-05, "clip_ratio/region_mean": 0.0025134946263278835, "epoch": 13.494460641399417, "grad_norm": 0.12223848700523376, "learning_rate": 1e-06, "loss": -0.0419, "step": 885 }, { "clip_ratio/high_max": 0.0033824653874034993, "clip_ratio/high_mean": 0.0014186567859724164, "clip_ratio/low_mean": 0.0012371748562145513, "clip_ratio/low_min": 1.256533960258821e-05, "clip_ratio/region_mean": 0.0026558317040326074, "epoch": 13.503790087463557, "grad_norm": 0.13529939949512482, "learning_rate": 1e-06, "loss": -0.0143, "step": 886 }, { "clip_ratio/high_max": 0.0028677445152425207, "clip_ratio/high_mean": 0.001251653406143305, "clip_ratio/low_mean": 0.00124818472431798, "clip_ratio/low_min": 9.836300978349755e-05, "clip_ratio/region_mean": 0.0024998381777550094, "epoch": 13.513119533527696, "grad_norm": 0.12827137112617493, "learning_rate": 1e-06, "loss": -0.0363, "step": 887 }, { "clip_ratio/high_max": 0.002635695884237066, "clip_ratio/high_mean": 0.0011737718996300828, "clip_ratio/low_mean": 0.0012570931066875346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024308649881277233, "epoch": 13.522448979591836, "grad_norm": 0.1189485639333725, "learning_rate": 1e-06, "loss": -0.0117, "step": 888 }, { "clip_ratio/high_max": 0.0027452050198917277, "clip_ratio/high_mean": 0.00115804493361793, "clip_ratio/low_mean": 0.0012230700776854064, "clip_ratio/low_min": 5.9580554079730064e-05, "clip_ratio/region_mean": 0.0023811150022083893, "epoch": 13.531778425655977, "grad_norm": 0.12266857922077179, "learning_rate": 1e-06, "loss": 0.0057, "step": 889 }, { "clip_ratio/high_max": 0.0028218238439876586, "clip_ratio/high_mean": 0.0012595596017490607, "clip_ratio/low_mean": 0.0015888533744146116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002848412965249736, "epoch": 13.541107871720117, "grad_norm": 0.22287705540657043, "learning_rate": 1e-06, "loss": 0.0243, "step": 890 }, { "clip_ratio/high_max": 0.00284343878593063, "clip_ratio/high_mean": 0.0011793407102231868, "clip_ratio/low_mean": 0.0012273779684619512, "clip_ratio/low_min": 5.764590241597034e-05, "clip_ratio/region_mean": 0.0024067187041509897, "epoch": 13.550437317784256, "grad_norm": 0.13702525198459625, "learning_rate": 1e-06, "loss": -0.0187, "step": 891 }, { "clip_ratio/high_max": 0.0022639693816017825, "clip_ratio/high_mean": 0.0010550611314101843, "clip_ratio/low_mean": 0.0012934463629790116, "clip_ratio/low_min": 0.00016416675498476252, "clip_ratio/region_mean": 0.002348507525312016, "epoch": 13.559766763848396, "grad_norm": 0.13432511687278748, "learning_rate": 1e-06, "loss": -0.005, "step": 892 }, { "clip_ratio/high_max": 0.0024704822135390714, "clip_ratio/high_mean": 0.0011700084432959557, "clip_ratio/low_mean": 0.0015362148824351607, "clip_ratio/low_min": 2.7103209504275583e-05, "clip_ratio/region_mean": 0.002706223356653936, "epoch": 13.569096209912537, "grad_norm": 0.1321430504322052, "learning_rate": 1e-06, "loss": 0.0151, "step": 893 }, { "clip_ratio/high_max": 0.0029745934589300305, "clip_ratio/high_mean": 0.0011174059800396208, "clip_ratio/low_mean": 0.0013322115100891097, "clip_ratio/low_min": 2.7630416298052296e-05, "clip_ratio/region_mean": 0.0024496175174135715, "epoch": 13.578425655976677, "grad_norm": 0.12417371571063995, "learning_rate": 1e-06, "loss": -0.0011, "step": 894 }, { "clip_ratio/high_max": 0.0031292916391976178, "clip_ratio/high_mean": 0.0013402972799667623, "clip_ratio/low_mean": 0.0013383482728386298, "clip_ratio/low_min": 4.3599582568276674e-05, "clip_ratio/region_mean": 0.002678645570995286, "epoch": 13.587755102040816, "grad_norm": 0.14334237575531006, "learning_rate": 1e-06, "loss": -0.0505, "step": 895 }, { "clip_ratio/high_max": 0.00333062898425851, "clip_ratio/high_mean": 0.001381813061016146, "clip_ratio/low_mean": 0.0014490344256046228, "clip_ratio/low_min": 0.0001234303508681478, "clip_ratio/region_mean": 0.0028308474211371504, "epoch": 13.597084548104956, "grad_norm": 0.13221228122711182, "learning_rate": 1e-06, "loss": -0.0132, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0567103794642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 755.4510498046875, "completions/mean_terminated_length": 554.6179809570312, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 14.00932944606414, "grad_norm": 0.14969795942306519, "learning_rate": 1e-06, "loss": -0.0266, "num_tokens": 553113682.0, "reward": 0.6208147406578064, "reward_std": 0.17625229060649872, "rewards/simpleverify_reward/mean": 0.6208147406578064, "rewards/simpleverify_reward/std": 0.4851885437965393, "step": 897 }, { "clip_ratio/high_max": 0.0021321147651178762, "clip_ratio/high_mean": 0.0009024411319842329, "clip_ratio/low_mean": 0.0004685198714469152, "clip_ratio/low_min": 1.8529499357100576e-05, "clip_ratio/region_mean": 0.0013709610102523584, "epoch": 14.018658892128279, "grad_norm": 0.13002531230449677, "learning_rate": 1e-06, "loss": -0.0539, "step": 898 }, { "clip_ratio/high_max": 0.0022830904053989798, "clip_ratio/high_mean": 0.0009456026236875914, "clip_ratio/low_mean": 0.0005751479429818573, "clip_ratio/low_min": 2.8988868507440202e-05, "clip_ratio/region_mean": 0.001520750480267452, "epoch": 14.02798833819242, "grad_norm": 0.13815423846244812, "learning_rate": 1e-06, "loss": -0.0121, "step": 899 }, { "clip_ratio/high_max": 0.0021398777134891134, "clip_ratio/high_mean": 0.0009047834009834332, "clip_ratio/low_mean": 0.000516072619575425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014208560387487523, "epoch": 14.03731778425656, "grad_norm": 0.13017743825912476, "learning_rate": 1e-06, "loss": -0.0709, "step": 900 }, { "clip_ratio/high_max": 0.001989206466532778, "clip_ratio/high_mean": 0.0008325697745021898, "clip_ratio/low_mean": 0.0005534265819733264, "clip_ratio/low_min": 5.677307581208879e-05, "clip_ratio/region_mean": 0.0013859963473805692, "epoch": 14.0466472303207, "grad_norm": 0.12986066937446594, "learning_rate": 1e-06, "loss": -0.0001, "step": 901 }, { "clip_ratio/high_max": 0.0024154558777809143, "clip_ratio/high_mean": 0.000894890799827408, "clip_ratio/low_mean": 0.0005908537896175403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014857446039968636, "epoch": 14.055976676384839, "grad_norm": 0.14485520124435425, "learning_rate": 1e-06, "loss": -0.012, "step": 902 }, { "clip_ratio/high_max": 0.002370783913647756, "clip_ratio/high_mean": 0.0009642664736020379, "clip_ratio/low_mean": 0.0007347641367232427, "clip_ratio/low_min": 4.8053605496534146e-05, "clip_ratio/region_mean": 0.0016990305957733653, "epoch": 14.06530612244898, "grad_norm": 0.14210668206214905, "learning_rate": 1e-06, "loss": -0.0205, "step": 903 }, { "clip_ratio/high_max": 0.0022944658921915106, "clip_ratio/high_mean": 0.0009390147315571085, "clip_ratio/low_mean": 0.0006571683288711938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015961830504238605, "epoch": 14.07463556851312, "grad_norm": 0.12780068814754486, "learning_rate": 1e-06, "loss": -0.0047, "step": 904 }, { "clip_ratio/high_max": 0.0020741868356708437, "clip_ratio/high_mean": 0.0009202171459037345, "clip_ratio/low_mean": 0.0006916583797647036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016118755338538904, "epoch": 14.08396501457726, "grad_norm": 0.13621775805950165, "learning_rate": 1e-06, "loss": -0.0249, "step": 905 }, { "clip_ratio/high_max": 0.002060031853034161, "clip_ratio/high_mean": 0.0008378832617381704, "clip_ratio/low_mean": 0.0006564872364833718, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014943705027690157, "epoch": 14.093294460641399, "grad_norm": 0.12371636927127838, "learning_rate": 1e-06, "loss": 0.0006, "step": 906 }, { "clip_ratio/high_max": 0.0022291329005383886, "clip_ratio/high_mean": 0.000812384401797317, "clip_ratio/low_mean": 0.000691331773850834, "clip_ratio/low_min": 2.2045855075703003e-05, "clip_ratio/region_mean": 0.0015037161865620874, "epoch": 14.102623906705539, "grad_norm": 0.11786044389009476, "learning_rate": 1e-06, "loss": -0.0108, "step": 907 }, { "clip_ratio/high_max": 0.0022796319535700604, "clip_ratio/high_mean": 0.0009864562343864236, "clip_ratio/low_mean": 0.0008921745311454288, "clip_ratio/low_min": 2.2316601643979084e-05, "clip_ratio/region_mean": 0.0018786307737173047, "epoch": 14.11195335276968, "grad_norm": 0.14417092502117157, "learning_rate": 1e-06, "loss": -0.0218, "step": 908 }, { "clip_ratio/high_max": 0.002153070417989511, "clip_ratio/high_mean": 0.0009623824680602411, "clip_ratio/low_mean": 0.0008726408050279133, "clip_ratio/low_min": 6.024687354511116e-05, "clip_ratio/region_mean": 0.001835023271269165, "epoch": 14.12128279883382, "grad_norm": 0.12148705869913101, "learning_rate": 1e-06, "loss": 0.0134, "step": 909 }, { "clip_ratio/high_max": 0.001995118276681751, "clip_ratio/high_mean": 0.0008352095665031811, "clip_ratio/low_mean": 0.0008332733141287463, "clip_ratio/low_min": 4.991334481019294e-05, "clip_ratio/region_mean": 0.0016684829024598002, "epoch": 14.130612244897959, "grad_norm": 0.12048781663179398, "learning_rate": 1e-06, "loss": -0.0127, "step": 910 }, { "clip_ratio/high_max": 0.0025849801531876437, "clip_ratio/high_mean": 0.0011180993606103584, "clip_ratio/low_mean": 0.0007752993515168782, "clip_ratio/low_min": 3.476073470665142e-05, "clip_ratio/region_mean": 0.0018933986648335122, "epoch": 14.139941690962099, "grad_norm": 0.12970127165317535, "learning_rate": 1e-06, "loss": -0.0652, "step": 911 }, { "clip_ratio/high_max": 0.002135657730832463, "clip_ratio/high_mean": 0.0008769598407525336, "clip_ratio/low_mean": 0.0009745557708811248, "clip_ratio/low_min": 6.452128764067311e-05, "clip_ratio/region_mean": 0.0018515156116336584, "epoch": 14.14927113702624, "grad_norm": 0.11937867105007172, "learning_rate": 1e-06, "loss": 0.0028, "step": 912 }, { "clip_ratio/high_max": 0.001953250155565911, "clip_ratio/high_mean": 0.0008245341159636155, "clip_ratio/low_mean": 0.0010529869605306885, "clip_ratio/low_min": 5.208333459449932e-05, "clip_ratio/region_mean": 0.0018775210191961378, "epoch": 14.15860058309038, "grad_norm": 0.11515150964260101, "learning_rate": 1e-06, "loss": 0.0128, "step": 913 }, { "clip_ratio/high_max": 0.002455089539580513, "clip_ratio/high_mean": 0.0010868431236303877, "clip_ratio/low_mean": 0.0009100359884541831, "clip_ratio/low_min": 8.621878805570304e-06, "clip_ratio/region_mean": 0.001996879102080129, "epoch": 14.167930029154519, "grad_norm": 0.12120584398508072, "learning_rate": 1e-06, "loss": -0.0255, "step": 914 }, { "clip_ratio/high_max": 0.0023854992068663705, "clip_ratio/high_mean": 0.0010666750276868697, "clip_ratio/low_mean": 0.0009492768058407819, "clip_ratio/low_min": 3.2316442229785025e-05, "clip_ratio/region_mean": 0.00201595183898462, "epoch": 14.177259475218658, "grad_norm": 0.13539746403694153, "learning_rate": 1e-06, "loss": -0.0224, "step": 915 }, { "clip_ratio/high_max": 0.0022336070323945023, "clip_ratio/high_mean": 0.000979129787083366, "clip_ratio/low_mean": 0.0010042173016699962, "clip_ratio/low_min": 4.6387925976887345e-05, "clip_ratio/region_mean": 0.0019833470360026695, "epoch": 14.186588921282798, "grad_norm": 0.11462336033582687, "learning_rate": 1e-06, "loss": -0.0209, "step": 916 }, { "clip_ratio/high_max": 0.002345109787711408, "clip_ratio/high_mean": 0.0009678079386503669, "clip_ratio/low_mean": 0.0011101311465608887, "clip_ratio/low_min": 0.00016861287076608278, "clip_ratio/region_mean": 0.002077939127048012, "epoch": 14.19591836734694, "grad_norm": 0.13211119174957275, "learning_rate": 1e-06, "loss": 0.0308, "step": 917 }, { "clip_ratio/high_max": 0.0022739557971362956, "clip_ratio/high_mean": 0.0009184006657960708, "clip_ratio/low_mean": 0.000950915860812529, "clip_ratio/low_min": 3.9835543248045724e-05, "clip_ratio/region_mean": 0.0018693165038712323, "epoch": 14.205247813411079, "grad_norm": 0.13406263291835785, "learning_rate": 1e-06, "loss": 0.0035, "step": 918 }, { "clip_ratio/high_max": 0.002644911073730327, "clip_ratio/high_mean": 0.0010202525809290819, "clip_ratio/low_mean": 0.001061573395418236, "clip_ratio/low_min": 4.7071792323549744e-05, "clip_ratio/region_mean": 0.002081826009089127, "epoch": 14.214577259475218, "grad_norm": 0.1266704946756363, "learning_rate": 1e-06, "loss": -0.0083, "step": 919 }, { "clip_ratio/high_max": 0.0023664222608204, "clip_ratio/high_mean": 0.001073367559001781, "clip_ratio/low_mean": 0.0011838728933071252, "clip_ratio/low_min": 0.00015215515668387525, "clip_ratio/region_mean": 0.0022572404996026307, "epoch": 14.223906705539358, "grad_norm": 0.19711726903915405, "learning_rate": 1e-06, "loss": -0.0265, "step": 920 }, { "clip_ratio/high_max": 0.0026139828405575827, "clip_ratio/high_mean": 0.0011277917292318307, "clip_ratio/low_mean": 0.0009537962541799061, "clip_ratio/low_min": 3.863409256155137e-05, "clip_ratio/region_mean": 0.002081587983411737, "epoch": 14.2332361516035, "grad_norm": 0.1462983787059784, "learning_rate": 1e-06, "loss": -0.0053, "step": 921 }, { "clip_ratio/high_max": 0.0030707967816852033, "clip_ratio/high_mean": 0.0013405666541075334, "clip_ratio/low_mean": 0.0007491079650208121, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002089674620947335, "epoch": 14.242565597667639, "grad_norm": 0.14318053424358368, "learning_rate": 1e-06, "loss": -0.0822, "step": 922 }, { "clip_ratio/high_max": 0.002247482319944538, "clip_ratio/high_mean": 0.0010654392408468993, "clip_ratio/low_mean": 0.0009853786941675935, "clip_ratio/low_min": 6.670921356999315e-05, "clip_ratio/region_mean": 0.002050817958661355, "epoch": 14.251895043731778, "grad_norm": 0.12203782051801682, "learning_rate": 1e-06, "loss": -0.032, "step": 923 }, { "clip_ratio/high_max": 0.002335211720492225, "clip_ratio/high_mean": 0.001002703960693907, "clip_ratio/low_mean": 0.0009630866152292583, "clip_ratio/low_min": 3.251755333621986e-05, "clip_ratio/region_mean": 0.001965790521353483, "epoch": 14.261224489795918, "grad_norm": 0.1335146427154541, "learning_rate": 1e-06, "loss": -0.0164, "step": 924 }, { "clip_ratio/high_max": 0.002520498019293882, "clip_ratio/high_mean": 0.0010447496115375543, "clip_ratio/low_mean": 0.001091637819627067, "clip_ratio/low_min": 8.63748691699584e-05, "clip_ratio/region_mean": 0.002136387411155738, "epoch": 14.270553935860057, "grad_norm": 0.13873517513275146, "learning_rate": 1e-06, "loss": -0.0168, "step": 925 }, { "clip_ratio/high_max": 0.002843659560312517, "clip_ratio/high_mean": 0.0011851942908833735, "clip_ratio/low_mean": 0.0010693122567317914, "clip_ratio/low_min": 3.4054914067382924e-05, "clip_ratio/region_mean": 0.002254506529425271, "epoch": 14.279883381924199, "grad_norm": 0.13015536963939667, "learning_rate": 1e-06, "loss": 0.0139, "step": 926 }, { "clip_ratio/high_max": 0.0022768777307646815, "clip_ratio/high_mean": 0.0010951667118206387, "clip_ratio/low_mean": 0.0010495628375792876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021447295221150853, "epoch": 14.289212827988338, "grad_norm": 0.126378133893013, "learning_rate": 1e-06, "loss": -0.0296, "step": 927 }, { "clip_ratio/high_max": 0.0024771498938207515, "clip_ratio/high_mean": 0.0010968905589834321, "clip_ratio/low_mean": 0.000995513319139718, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020924038835801184, "epoch": 14.298542274052478, "grad_norm": 0.12050725519657135, "learning_rate": 1e-06, "loss": -0.0201, "step": 928 }, { "clip_ratio/high_max": 0.0022828545479569584, "clip_ratio/high_mean": 0.0010056908104161266, "clip_ratio/low_mean": 0.0010011418344220147, "clip_ratio/low_min": 4.3749618271249346e-05, "clip_ratio/region_mean": 0.0020068326630280353, "epoch": 14.307871720116617, "grad_norm": 0.13950660824775696, "learning_rate": 1e-06, "loss": -0.0087, "step": 929 }, { "clip_ratio/high_max": 0.002354379110329319, "clip_ratio/high_mean": 0.0009210168664139928, "clip_ratio/low_mean": 0.001131519387854496, "clip_ratio/low_min": 5.436201990960399e-05, "clip_ratio/region_mean": 0.002052536314295139, "epoch": 14.317201166180759, "grad_norm": 0.12580522894859314, "learning_rate": 1e-06, "loss": 0.0346, "step": 930 }, { "clip_ratio/high_max": 0.00236603675148217, "clip_ratio/high_mean": 0.0009832563082454726, "clip_ratio/low_mean": 0.0010800404124893248, "clip_ratio/low_min": 6.691541420877911e-05, "clip_ratio/region_mean": 0.00206329671345884, "epoch": 14.326530612244898, "grad_norm": 0.13978849351406097, "learning_rate": 1e-06, "loss": 0.0174, "step": 931 }, { "clip_ratio/high_max": 0.0024994046543724835, "clip_ratio/high_mean": 0.0010441215763421496, "clip_ratio/low_mean": 0.0010637633786245715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021078849022160284, "epoch": 14.335860058309038, "grad_norm": 0.12466134876012802, "learning_rate": 1e-06, "loss": -0.0146, "step": 932 }, { "clip_ratio/high_max": 0.0025941191852325574, "clip_ratio/high_mean": 0.0010674317891243845, "clip_ratio/low_mean": 0.0012813621251552831, "clip_ratio/low_min": 0.00015002742475189734, "clip_ratio/region_mean": 0.002348794019781053, "epoch": 14.345189504373177, "grad_norm": 0.13128824532032013, "learning_rate": 1e-06, "loss": -0.0697, "step": 933 }, { "clip_ratio/high_max": 0.002672317343240138, "clip_ratio/high_mean": 0.0010759210090327542, "clip_ratio/low_mean": 0.0009295324853155762, "clip_ratio/low_min": 6.443041093007196e-05, "clip_ratio/region_mean": 0.002005453541642055, "epoch": 14.354518950437317, "grad_norm": 0.12702219188213348, "learning_rate": 1e-06, "loss": -0.0073, "step": 934 }, { "clip_ratio/high_max": 0.0025854838095256127, "clip_ratio/high_mean": 0.0011564231426746119, "clip_ratio/low_mean": 0.000901870291272644, "clip_ratio/low_min": 6.69623950670939e-05, "clip_ratio/region_mean": 0.00205829345213715, "epoch": 14.363848396501458, "grad_norm": 0.10431494563817978, "learning_rate": 1e-06, "loss": -0.0282, "step": 935 }, { "clip_ratio/high_max": 0.0031442917825188488, "clip_ratio/high_mean": 0.0013375504713621922, "clip_ratio/low_mean": 0.0010702581530495081, "clip_ratio/low_min": 1.628452264412772e-05, "clip_ratio/region_mean": 0.002407808569842018, "epoch": 14.373177842565598, "grad_norm": 0.1322004199028015, "learning_rate": 1e-06, "loss": -0.0572, "step": 936 }, { "clip_ratio/high_max": 0.0028658176597673446, "clip_ratio/high_mean": 0.001189300099213142, "clip_ratio/low_mean": 0.0010691876595956273, "clip_ratio/low_min": 9.669288556324318e-05, "clip_ratio/region_mean": 0.0022584877515328117, "epoch": 14.382507288629737, "grad_norm": 0.13608138263225555, "learning_rate": 1e-06, "loss": -0.0288, "step": 937 }, { "clip_ratio/high_max": 0.0023503157062805258, "clip_ratio/high_mean": 0.0009750783065101132, "clip_ratio/low_mean": 0.001167000493296655, "clip_ratio/low_min": 8.580875601182925e-05, "clip_ratio/region_mean": 0.002142078836186556, "epoch": 14.391836734693877, "grad_norm": 0.12495539337396622, "learning_rate": 1e-06, "loss": 0.0133, "step": 938 }, { "clip_ratio/high_max": 0.0022415608109440655, "clip_ratio/high_mean": 0.0009491406817687675, "clip_ratio/low_mean": 0.0011565360055101337, "clip_ratio/low_min": 5.4371883379644714e-05, "clip_ratio/region_mean": 0.0021056766563560814, "epoch": 14.401166180758018, "grad_norm": 0.1135687455534935, "learning_rate": 1e-06, "loss": 0.0051, "step": 939 }, { "clip_ratio/high_max": 0.002703091755392961, "clip_ratio/high_mean": 0.0010301889396941988, "clip_ratio/low_mean": 0.0012471113259380218, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002277300249261316, "epoch": 14.410495626822158, "grad_norm": 0.12958236038684845, "learning_rate": 1e-06, "loss": 0.0285, "step": 940 }, { "clip_ratio/high_max": 0.0025375607510795817, "clip_ratio/high_mean": 0.001088365101168165, "clip_ratio/low_mean": 0.0009787153721845243, "clip_ratio/low_min": 3.8747675716876984e-05, "clip_ratio/region_mean": 0.0020670805097324774, "epoch": 14.419825072886297, "grad_norm": 9.442768096923828, "learning_rate": 1e-06, "loss": 0.0111, "step": 941 }, { "clip_ratio/high_max": 0.0030651435008621775, "clip_ratio/high_mean": 0.0013379396332311444, "clip_ratio/low_mean": 0.0011468818338471465, "clip_ratio/low_min": 2.466942896717228e-05, "clip_ratio/region_mean": 0.0024848214889061637, "epoch": 14.429154518950437, "grad_norm": 0.13983182609081268, "learning_rate": 1e-06, "loss": -0.0009, "step": 942 }, { "clip_ratio/high_max": 0.0029531323016271926, "clip_ratio/high_mean": 0.0011889290435647126, "clip_ratio/low_mean": 0.0010633777219482, "clip_ratio/low_min": 6.785496589145623e-05, "clip_ratio/region_mean": 0.002252306730952114, "epoch": 14.438483965014576, "grad_norm": 0.14745375514030457, "learning_rate": 1e-06, "loss": -0.0342, "step": 943 }, { "clip_ratio/high_max": 0.0028511406635516323, "clip_ratio/high_mean": 0.0011925216422241647, "clip_ratio/low_mean": 0.0010012209040723974, "clip_ratio/low_min": 7.85820320743369e-05, "clip_ratio/region_mean": 0.0021937425626674667, "epoch": 14.447813411078718, "grad_norm": 0.12538109719753265, "learning_rate": 1e-06, "loss": -0.0538, "step": 944 }, { "clip_ratio/high_max": 0.002897155274695251, "clip_ratio/high_mean": 0.0012587637884280412, "clip_ratio/low_mean": 0.0012891206752101425, "clip_ratio/low_min": 0.00025955435557989404, "clip_ratio/region_mean": 0.00254788450547494, "epoch": 14.457142857142857, "grad_norm": 0.13619297742843628, "learning_rate": 1e-06, "loss": 0.0001, "step": 945 }, { "clip_ratio/high_max": 0.0027963031388935633, "clip_ratio/high_mean": 0.0012036244552291464, "clip_ratio/low_mean": 0.0012208813459437806, "clip_ratio/low_min": 1.8565275240689516e-05, "clip_ratio/region_mean": 0.002424505779345054, "epoch": 14.466472303206997, "grad_norm": 0.12614497542381287, "learning_rate": 1e-06, "loss": -0.0217, "step": 946 }, { "clip_ratio/high_max": 0.0031505838414886966, "clip_ratio/high_mean": 0.0012830314590246417, "clip_ratio/low_mean": 0.0013210248434916139, "clip_ratio/low_min": 0.00010843661584658548, "clip_ratio/region_mean": 0.00260405625886051, "epoch": 14.475801749271136, "grad_norm": 0.11964280903339386, "learning_rate": 1e-06, "loss": -0.0396, "step": 947 }, { "clip_ratio/high_max": 0.0026417123517603613, "clip_ratio/high_mean": 0.0011750264529837295, "clip_ratio/low_mean": 0.0011886841311934404, "clip_ratio/low_min": 1.3363267498789355e-05, "clip_ratio/region_mean": 0.0023637106132810004, "epoch": 14.485131195335278, "grad_norm": 0.11716482788324356, "learning_rate": 1e-06, "loss": -0.0256, "step": 948 }, { "clip_ratio/high_max": 0.003004811624123249, "clip_ratio/high_mean": 0.001232846869243076, "clip_ratio/low_mean": 0.0012912543406855548, "clip_ratio/low_min": 2.8191248929942958e-05, "clip_ratio/region_mean": 0.002524101219023578, "epoch": 14.494460641399417, "grad_norm": 0.11609780788421631, "learning_rate": 1e-06, "loss": -0.0191, "step": 949 }, { "clip_ratio/high_max": 0.0024833397910697386, "clip_ratio/high_mean": 0.0010518038980080746, "clip_ratio/low_mean": 0.0014151472623780137, "clip_ratio/low_min": 0.00021751447457063477, "clip_ratio/region_mean": 0.0024669511258252896, "epoch": 14.503790087463557, "grad_norm": 0.14569492638111115, "learning_rate": 1e-06, "loss": 0.0324, "step": 950 }, { "clip_ratio/high_max": 0.003082641000219155, "clip_ratio/high_mean": 0.0012449124187696725, "clip_ratio/low_mean": 0.0013270887320686597, "clip_ratio/low_min": 0.00018013046610576566, "clip_ratio/region_mean": 0.002572001176304184, "epoch": 14.513119533527696, "grad_norm": 0.13831104338169098, "learning_rate": 1e-06, "loss": -0.0115, "step": 951 }, { "clip_ratio/high_max": 0.002846763440174982, "clip_ratio/high_mean": 0.0013010627662879415, "clip_ratio/low_mean": 0.0013778409720544005, "clip_ratio/low_min": 0.00023421824880642816, "clip_ratio/region_mean": 0.002678903802006971, "epoch": 14.522448979591836, "grad_norm": 0.12651324272155762, "learning_rate": 1e-06, "loss": -0.0168, "step": 952 }, { "clip_ratio/high_max": 0.0027706861146725714, "clip_ratio/high_mean": 0.0012503973157436121, "clip_ratio/low_mean": 0.001177241669211071, "clip_ratio/low_min": 1.3941556971985847e-05, "clip_ratio/region_mean": 0.0024276388867292553, "epoch": 14.531778425655977, "grad_norm": 0.11958511173725128, "learning_rate": 1e-06, "loss": -0.0616, "step": 953 }, { "clip_ratio/high_max": 0.0027408288879087195, "clip_ratio/high_mean": 0.0011930600921914447, "clip_ratio/low_mean": 0.0012990522118343506, "clip_ratio/low_min": 0.0001541691890452057, "clip_ratio/region_mean": 0.002492112311301753, "epoch": 14.541107871720117, "grad_norm": 0.13056538999080658, "learning_rate": 1e-06, "loss": 0.0033, "step": 954 }, { "clip_ratio/high_max": 0.0033028795442078263, "clip_ratio/high_mean": 0.0012961380343767814, "clip_ratio/low_mean": 0.0013840430256095715, "clip_ratio/low_min": 3.1748090805194806e-05, "clip_ratio/region_mean": 0.0026801810672623105, "epoch": 14.550437317784256, "grad_norm": 0.1939610093832016, "learning_rate": 1e-06, "loss": -0.0077, "step": 955 }, { "clip_ratio/high_max": 0.0032667394043528475, "clip_ratio/high_mean": 0.0013424872740870342, "clip_ratio/low_mean": 0.0012122152184019797, "clip_ratio/low_min": 5.3465810196939856e-05, "clip_ratio/region_mean": 0.002554702492489014, "epoch": 14.559766763848396, "grad_norm": 0.13248389959335327, "learning_rate": 1e-06, "loss": -0.047, "step": 956 }, { "clip_ratio/high_max": 0.0028414863991201855, "clip_ratio/high_mean": 0.0013341539415705483, "clip_ratio/low_mean": 0.0012563679883896839, "clip_ratio/low_min": 0.00010780977936519776, "clip_ratio/region_mean": 0.0025905219445121475, "epoch": 14.569096209912537, "grad_norm": 0.11197350174188614, "learning_rate": 1e-06, "loss": -0.0341, "step": 957 }, { "clip_ratio/high_max": 0.0026658251917979214, "clip_ratio/high_mean": 0.0011300213773210999, "clip_ratio/low_mean": 0.001313895558268996, "clip_ratio/low_min": 8.55326934470213e-05, "clip_ratio/region_mean": 0.0024439169574179687, "epoch": 14.578425655976677, "grad_norm": 0.1402246505022049, "learning_rate": 1e-06, "loss": -0.0009, "step": 958 }, { "clip_ratio/high_max": 0.002564258931670338, "clip_ratio/high_mean": 0.0010224614070466487, "clip_ratio/low_mean": 0.0013112426786392462, "clip_ratio/low_min": 3.2391810236731544e-05, "clip_ratio/region_mean": 0.0023337041202466935, "epoch": 14.587755102040816, "grad_norm": 0.125115767121315, "learning_rate": 1e-06, "loss": 0.0198, "step": 959 }, { "clip_ratio/high_max": 0.003075114087550901, "clip_ratio/high_mean": 0.0013087887800793396, "clip_ratio/low_mean": 0.001271657447432517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002580446242063772, "epoch": 14.597084548104956, "grad_norm": 0.1357250213623047, "learning_rate": 1e-06, "loss": -0.0224, "step": 960 }, { "epoch": 14.597084548104956, "step": 960, "total_flos": 0.0, "train_loss": -0.0033660151399809973, "train_runtime": 58553.53, "train_samples_per_second": 24.484, "train_steps_per_second": 0.027 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 553113682, "num_train_epochs": 15, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }