{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.895626822157434, "eval_steps": 500, "global_step": 1517, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013462611607142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 598.06640625, "completions/mean_terminated_length": 550.3324584960938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.009329446064139942, "grad_norm": 0.15793108940124512, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 9222000.0, "reward": 0.490513414144516, "reward_std": 0.25811317563056946, "rewards/simpleverify_reward/mean": 0.4905133843421936, "rewards/simpleverify_reward/std": 0.49992743134498596, "step": 1 }, { "clip_ratio/high_max": 0.0024601568657089956, "clip_ratio/high_mean": 0.0010448666562297149, "clip_ratio/low_mean": 0.000644696350718732, "clip_ratio/low_min": 8.820428593026008e-05, "clip_ratio/region_mean": 0.0016895629960345104, "epoch": 0.018658892128279883, "grad_norm": 0.15137478709220886, "learning_rate": 1e-06, "loss": -0.0108, "step": 2 }, { "clip_ratio/high_max": 0.0021288376301527023, "clip_ratio/high_mean": 0.0010039467633760069, "clip_ratio/low_mean": 0.0008111417137115495, "clip_ratio/low_min": 7.379240560112521e-05, "clip_ratio/region_mean": 0.001815088457078673, "epoch": 0.027988338192419825, "grad_norm": 0.12634281814098358, "learning_rate": 1e-06, "loss": -0.0041, "step": 3 }, { "clip_ratio/high_max": 0.0026518664235481992, "clip_ratio/high_mean": 0.0011118977236037608, "clip_ratio/low_mean": 0.0008823053431115113, "clip_ratio/low_min": 7.700139030930586e-05, "clip_ratio/region_mean": 0.001994203048525378, "epoch": 0.037317784256559766, "grad_norm": 0.1273786425590515, "learning_rate": 1e-06, "loss": -0.0266, "step": 4 }, { "clip_ratio/high_max": 0.002308279996213969, "clip_ratio/high_mean": 0.0010572430128377164, "clip_ratio/low_mean": 0.0009346202677988913, "clip_ratio/low_min": 0.0001029746117637842, "clip_ratio/region_mean": 0.0019918632970075123, "epoch": 0.04664723032069971, "grad_norm": 0.986992359161377, "learning_rate": 1e-06, "loss": 0.0027, "step": 5 }, { "clip_ratio/high_max": 0.002323457505553961, "clip_ratio/high_mean": 0.0011018939112545922, "clip_ratio/low_mean": 0.001321036252193153, "clip_ratio/low_min": 0.00021645989545504563, "clip_ratio/region_mean": 0.002422930207103491, "epoch": 0.05597667638483965, "grad_norm": 0.142598956823349, "learning_rate": 1e-06, "loss": 0.0196, "step": 6 }, { "clip_ratio/high_max": 0.0028667169317486696, "clip_ratio/high_mean": 0.0012652547993639018, "clip_ratio/low_mean": 0.00149460726606776, "clip_ratio/low_min": 0.0002861733537429245, "clip_ratio/region_mean": 0.0027598620872595347, "epoch": 0.0653061224489796, "grad_norm": 0.13298052549362183, "learning_rate": 1e-06, "loss": 0.0273, "step": 7 }, { "clip_ratio/high_max": 0.0028181808156659827, "clip_ratio/high_mean": 0.001236196381796617, "clip_ratio/low_mean": 0.0014091433258727193, "clip_ratio/low_min": 0.000168387468875153, "clip_ratio/region_mean": 0.002645339773152955, "epoch": 0.07463556851311953, "grad_norm": 0.14220747351646423, "learning_rate": 1e-06, "loss": 0.0423, "step": 8 }, { "clip_ratio/high_max": 0.0031344193193945102, "clip_ratio/high_mean": 0.0013968820276204497, "clip_ratio/low_mean": 0.0013609863053716253, "clip_ratio/low_min": 7.502383687096881e-05, "clip_ratio/region_mean": 0.0027578683439060114, "epoch": 0.08396501457725948, "grad_norm": 0.12145263701677322, "learning_rate": 1e-06, "loss": -0.0205, "step": 9 }, { "clip_ratio/high_max": 0.0031802451267139986, "clip_ratio/high_mean": 0.0013526883085432928, "clip_ratio/low_mean": 0.001444492594600888, "clip_ratio/low_min": 0.00025770642514544306, "clip_ratio/region_mean": 0.0027971808740403503, "epoch": 0.09329446064139942, "grad_norm": 0.15231800079345703, "learning_rate": 1e-06, "loss": 0.0194, "step": 10 }, { "clip_ratio/high_max": 0.0024487841365044005, "clip_ratio/high_mean": 0.001158136743470095, "clip_ratio/low_mean": 0.0014681081702292431, "clip_ratio/low_min": 0.0003436464539845474, "clip_ratio/region_mean": 0.002626244895509444, "epoch": 0.10262390670553936, "grad_norm": 0.12963512539863586, "learning_rate": 1e-06, "loss": 0.0282, "step": 11 }, { "clip_ratio/high_max": 0.0025585607727407478, "clip_ratio/high_mean": 0.0011575238713703584, "clip_ratio/low_mean": 0.001335229924734449, "clip_ratio/low_min": 0.00023444405178452143, "clip_ratio/region_mean": 0.0024927538179326802, "epoch": 0.1119533527696793, "grad_norm": 0.13105177879333496, "learning_rate": 1e-06, "loss": 0.0343, "step": 12 }, { "clip_ratio/high_max": 0.00249174700002186, "clip_ratio/high_mean": 0.0012727191915473668, "clip_ratio/low_mean": 0.0012859527560067363, "clip_ratio/low_min": 7.343783363467082e-05, "clip_ratio/region_mean": 0.002558671993028838, "epoch": 0.12128279883381925, "grad_norm": 0.13071057200431824, "learning_rate": 1e-06, "loss": -0.0099, "step": 13 }, { "clip_ratio/high_max": 0.00304969992430415, "clip_ratio/high_mean": 0.001370076795865316, "clip_ratio/low_mean": 0.001151607664723997, "clip_ratio/low_min": 0.00020763497559528332, "clip_ratio/region_mean": 0.0025216845024260692, "epoch": 0.1306122448979592, "grad_norm": 0.13296616077423096, "learning_rate": 1e-06, "loss": -0.0619, "step": 14 }, { "clip_ratio/high_max": 0.002731779204623308, "clip_ratio/high_mean": 0.0013025071948504774, "clip_ratio/low_mean": 0.0010966669233312132, "clip_ratio/low_min": 0.00017717354603519198, "clip_ratio/region_mean": 0.0023991741763893515, "epoch": 0.13994169096209913, "grad_norm": 0.13582149147987366, "learning_rate": 1e-06, "loss": -0.0866, "step": 15 }, { "clip_ratio/high_max": 0.002916733290476259, "clip_ratio/high_mean": 0.0012729555855912622, "clip_ratio/low_mean": 0.001118361709814053, "clip_ratio/low_min": 0.00016511702142452123, "clip_ratio/region_mean": 0.0023913173281471245, "epoch": 0.14927113702623906, "grad_norm": 0.13542574644088745, "learning_rate": 1e-06, "loss": -0.0292, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0126953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 609.845947265625, "completions/mean_terminated_length": 565.0189819335938, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.158600583090379, "grad_norm": 0.12498702108860016, "learning_rate": 1e-06, "loss": 0.0494, "num_tokens": 18628863.0, "reward": 0.5178571939468384, "reward_std": 0.24615387618541718, "rewards/simpleverify_reward/mean": 0.5178571343421936, "rewards/simpleverify_reward/std": 0.4996984302997589, "step": 17 }, { "clip_ratio/high_max": 0.002159903437132016, "clip_ratio/high_mean": 0.0009694940727058565, "clip_ratio/low_mean": 0.0006389547415892594, "clip_ratio/low_min": 2.472402593411971e-05, "clip_ratio/region_mean": 0.0016084488743217662, "epoch": 0.16793002915451896, "grad_norm": 0.13060401380062103, "learning_rate": 1e-06, "loss": 0.0065, "step": 18 }, { "clip_ratio/high_max": 0.002291696539032273, "clip_ratio/high_mean": 0.0010087170849146787, "clip_ratio/low_mean": 0.0006390835678757867, "clip_ratio/low_min": 5.4300301599141676e-05, "clip_ratio/region_mean": 0.0016478006800753064, "epoch": 0.1772594752186589, "grad_norm": 0.13525819778442383, "learning_rate": 1e-06, "loss": 0.0037, "step": 19 }, { "clip_ratio/high_max": 0.0024093673928291537, "clip_ratio/high_mean": 0.0010473035545146558, "clip_ratio/low_mean": 0.0005353170690796105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015826206363271922, "epoch": 0.18658892128279883, "grad_norm": 0.14064635336399078, "learning_rate": 1e-06, "loss": -0.0044, "step": 20 }, { "clip_ratio/high_max": 0.0022806927518104203, "clip_ratio/high_mean": 0.0010065789574582595, "clip_ratio/low_mean": 0.0007657364203623729, "clip_ratio/low_min": 0.00010145609030587366, "clip_ratio/region_mean": 0.0017723153905535582, "epoch": 0.19591836734693877, "grad_norm": 0.14254872500896454, "learning_rate": 1e-06, "loss": 0.0415, "step": 21 }, { "clip_ratio/high_max": 0.0022462166416516993, "clip_ratio/high_mean": 0.001022220039885724, "clip_ratio/low_mean": 0.0006201261439855443, "clip_ratio/low_min": 1.4578959962818772e-05, "clip_ratio/region_mean": 0.0016423461638623849, "epoch": 0.20524781341107873, "grad_norm": 0.15892857313156128, "learning_rate": 1e-06, "loss": -0.0321, "step": 22 }, { "clip_ratio/high_max": 0.0025781611184356734, "clip_ratio/high_mean": 0.001029186838422902, "clip_ratio/low_mean": 0.0007028659128991421, "clip_ratio/low_min": 4.412868383951718e-05, "clip_ratio/region_mean": 0.0017320527622359805, "epoch": 0.21457725947521866, "grad_norm": 0.13623952865600586, "learning_rate": 1e-06, "loss": -0.014, "step": 23 }, { "clip_ratio/high_max": 0.0025915548285411205, "clip_ratio/high_mean": 0.0011983362355749705, "clip_ratio/low_mean": 0.0007230298497233889, "clip_ratio/low_min": 8.303965387312928e-05, "clip_ratio/region_mean": 0.0019213660707464442, "epoch": 0.2239067055393586, "grad_norm": 0.1398254632949829, "learning_rate": 1e-06, "loss": -0.0364, "step": 24 }, { "clip_ratio/high_max": 0.002120888137142174, "clip_ratio/high_mean": 0.0010612709156703204, "clip_ratio/low_mean": 0.0008509352992405184, "clip_ratio/low_min": 5.7444940466666594e-05, "clip_ratio/region_mean": 0.0019122062367387116, "epoch": 0.23323615160349853, "grad_norm": 0.11491367965936661, "learning_rate": 1e-06, "loss": -0.0222, "step": 25 }, { "clip_ratio/high_max": 0.002213675354141742, "clip_ratio/high_mean": 0.0010549918479227927, "clip_ratio/low_mean": 0.0008460203043796355, "clip_ratio/low_min": 3.555983948899666e-05, "clip_ratio/region_mean": 0.0019010121395695023, "epoch": 0.2425655976676385, "grad_norm": 0.11270330101251602, "learning_rate": 1e-06, "loss": -0.0098, "step": 26 }, { "clip_ratio/high_max": 0.002299379884789232, "clip_ratio/high_mean": 0.0010357006613048725, "clip_ratio/low_mean": 0.0010082713051815517, "clip_ratio/low_min": 0.00012026631429762347, "clip_ratio/region_mean": 0.0020439720028662123, "epoch": 0.2518950437317784, "grad_norm": 0.12197400629520416, "learning_rate": 1e-06, "loss": -0.0059, "step": 27 }, { "clip_ratio/high_max": 0.002346960944123566, "clip_ratio/high_mean": 0.0010146400272788014, "clip_ratio/low_mean": 0.0011778413245338015, "clip_ratio/low_min": 0.00013644692899106303, "clip_ratio/region_mean": 0.0021924813554505818, "epoch": 0.2612244897959184, "grad_norm": 0.12335337698459625, "learning_rate": 1e-06, "loss": 0.0122, "step": 28 }, { "clip_ratio/high_max": 0.002666456341103185, "clip_ratio/high_mean": 0.0012204042650409974, "clip_ratio/low_mean": 0.0012207690670038573, "clip_ratio/low_min": 0.00020913442676828708, "clip_ratio/region_mean": 0.002441173324768897, "epoch": 0.2705539358600583, "grad_norm": 0.13716165721416473, "learning_rate": 1e-06, "loss": -0.0082, "step": 29 }, { "clip_ratio/high_max": 0.0027250023194937967, "clip_ratio/high_mean": 0.0012492801579355728, "clip_ratio/low_mean": 0.001126157485487056, "clip_ratio/low_min": 7.836080112610944e-05, "clip_ratio/region_mean": 0.00237543760886183, "epoch": 0.27988338192419826, "grad_norm": 0.1165609210729599, "learning_rate": 1e-06, "loss": -0.0391, "step": 30 }, { "clip_ratio/high_max": 0.002974663169879932, "clip_ratio/high_mean": 0.0012166533961135428, "clip_ratio/low_mean": 0.0015006653557065874, "clip_ratio/low_min": 9.725085692480206e-05, "clip_ratio/region_mean": 0.0027173188645974733, "epoch": 0.2892128279883382, "grad_norm": 0.14555908739566803, "learning_rate": 1e-06, "loss": 0.0329, "step": 31 }, { "clip_ratio/high_max": 0.0023905233611003496, "clip_ratio/high_mean": 0.0011548375914571807, "clip_ratio/low_mean": 0.0013143258256604895, "clip_ratio/low_min": 0.0001732370192257804, "clip_ratio/region_mean": 0.002469163438945543, "epoch": 0.29854227405247813, "grad_norm": 0.12721918523311615, "learning_rate": 1e-06, "loss": -0.0351, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01416015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 613.6419067382812, "completions/mean_terminated_length": 563.6228637695312, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.30787172011661806, "grad_norm": 0.13368375599384308, "learning_rate": 1e-06, "loss": -0.0363, "num_tokens": 28031929.0, "reward": 0.5321568250656128, "reward_std": 0.23263370990753174, "rewards/simpleverify_reward/mean": 0.5321568250656128, "rewards/simpleverify_reward/std": 0.4989822208881378, "step": 33 }, { "clip_ratio/high_max": 0.002626552704896312, "clip_ratio/high_mean": 0.0010393682387075387, "clip_ratio/low_mean": 0.0005832021251990227, "clip_ratio/low_min": 4.563502898236038e-05, "clip_ratio/region_mean": 0.0016225703584495932, "epoch": 0.317201166180758, "grad_norm": 0.12965810298919678, "learning_rate": 1e-06, "loss": 0.0099, "step": 34 }, { "clip_ratio/high_max": 0.002326758265553508, "clip_ratio/high_mean": 0.0010395546632935293, "clip_ratio/low_mean": 0.0006204050432643271, "clip_ratio/low_min": 4.566454208543291e-05, "clip_ratio/region_mean": 0.001659959729295224, "epoch": 0.32653061224489793, "grad_norm": 0.12623383104801178, "learning_rate": 1e-06, "loss": -0.0499, "step": 35 }, { "clip_ratio/high_max": 0.002126647093973588, "clip_ratio/high_mean": 0.0008341245229530614, "clip_ratio/low_mean": 0.0007388288504444063, "clip_ratio/low_min": 0.00010162334001506679, "clip_ratio/region_mean": 0.0015729533770354465, "epoch": 0.3358600583090379, "grad_norm": 0.11803451180458069, "learning_rate": 1e-06, "loss": 0.0661, "step": 36 }, { "clip_ratio/high_max": 0.002318061469850363, "clip_ratio/high_mean": 0.0009527234124107054, "clip_ratio/low_mean": 0.000773030158597976, "clip_ratio/low_min": 0.00013140742794348625, "clip_ratio/region_mean": 0.0017257536019315012, "epoch": 0.34518950437317786, "grad_norm": 0.13941334187984467, "learning_rate": 1e-06, "loss": -0.0064, "step": 37 }, { "clip_ratio/high_max": 0.002032895470620133, "clip_ratio/high_mean": 0.000978511072389665, "clip_ratio/low_mean": 0.0006805238408560399, "clip_ratio/low_min": 8.213186902139569e-05, "clip_ratio/region_mean": 0.0016590349187026732, "epoch": 0.3545189504373178, "grad_norm": 0.13098467886447906, "learning_rate": 1e-06, "loss": -0.0028, "step": 38 }, { "clip_ratio/high_max": 0.002181304014811758, "clip_ratio/high_mean": 0.0009898940770654008, "clip_ratio/low_mean": 0.0008124434061755892, "clip_ratio/low_min": 5.668350877385819e-05, "clip_ratio/region_mean": 0.0018023374796030112, "epoch": 0.3638483965014577, "grad_norm": 0.1251867562532425, "learning_rate": 1e-06, "loss": 0.0189, "step": 39 }, { "clip_ratio/high_max": 0.0022022799457772635, "clip_ratio/high_mean": 0.0010273077459714841, "clip_ratio/low_mean": 0.0007001176927587949, "clip_ratio/low_min": 7.806645044183824e-06, "clip_ratio/region_mean": 0.001727425493299961, "epoch": 0.37317784256559766, "grad_norm": 0.12788569927215576, "learning_rate": 1e-06, "loss": -0.0341, "step": 40 }, { "clip_ratio/high_max": 0.0022233412601053715, "clip_ratio/high_mean": 0.0009245502969861263, "clip_ratio/low_mean": 0.0008491765183862299, "clip_ratio/low_min": 7.619911320944084e-05, "clip_ratio/region_mean": 0.0017737267844495364, "epoch": 0.3825072886297376, "grad_norm": 0.1204029992222786, "learning_rate": 1e-06, "loss": -0.0219, "step": 41 }, { "clip_ratio/high_max": 0.0025394043695996515, "clip_ratio/high_mean": 0.0012033716411679052, "clip_ratio/low_mean": 0.0009436263462703209, "clip_ratio/low_min": 0.00016213468097703299, "clip_ratio/region_mean": 0.0021469979983521625, "epoch": 0.39183673469387753, "grad_norm": 0.13179802894592285, "learning_rate": 1e-06, "loss": 0.0027, "step": 42 }, { "clip_ratio/high_max": 0.0025177651295962278, "clip_ratio/high_mean": 0.0011342557372699957, "clip_ratio/low_mean": 0.0009892123234749306, "clip_ratio/low_min": 8.098565194814e-05, "clip_ratio/region_mean": 0.0021234680243651383, "epoch": 0.40116618075801747, "grad_norm": 0.11540088057518005, "learning_rate": 1e-06, "loss": 0.0105, "step": 43 }, { "clip_ratio/high_max": 0.002883447625208646, "clip_ratio/high_mean": 0.0011544148801476695, "clip_ratio/low_mean": 0.0012134121898270678, "clip_ratio/low_min": 0.00013870753264200175, "clip_ratio/region_mean": 0.002367827059060801, "epoch": 0.41049562682215746, "grad_norm": 0.1349673569202423, "learning_rate": 1e-06, "loss": 0.032, "step": 44 }, { "clip_ratio/high_max": 0.0020178223712719046, "clip_ratio/high_mean": 0.0008869032935763244, "clip_ratio/low_mean": 0.0010751760346465744, "clip_ratio/low_min": 0.00015780563990119845, "clip_ratio/region_mean": 0.001962079346412793, "epoch": 0.4198250728862974, "grad_norm": 0.10952676832675934, "learning_rate": 1e-06, "loss": 0.0278, "step": 45 }, { "clip_ratio/high_max": 0.0022698242028127424, "clip_ratio/high_mean": 0.0010209984957327833, "clip_ratio/low_mean": 0.0011120834824396297, "clip_ratio/low_min": 0.00022615100897382945, "clip_ratio/region_mean": 0.0021330818999558687, "epoch": 0.4291545189504373, "grad_norm": 0.12151277810335159, "learning_rate": 1e-06, "loss": -0.0222, "step": 46 }, { "clip_ratio/high_max": 0.002615006385894958, "clip_ratio/high_mean": 0.0010445823536429089, "clip_ratio/low_mean": 0.0011482937989057973, "clip_ratio/low_min": 7.886492858233396e-05, "clip_ratio/region_mean": 0.0021928760907030664, "epoch": 0.43848396501457726, "grad_norm": 0.13126592338085175, "learning_rate": 1e-06, "loss": 0.002, "step": 47 }, { "clip_ratio/high_max": 0.002351936032937374, "clip_ratio/high_mean": 0.000990244501736015, "clip_ratio/low_mean": 0.0013020757323829457, "clip_ratio/low_min": 0.00025782081775105326, "clip_ratio/region_mean": 0.002292320234118961, "epoch": 0.4478134110787172, "grad_norm": 0.11731404811143875, "learning_rate": 1e-06, "loss": 0.0354, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01708984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3834.0, "completions/mean_length": 604.4925537109375, "completions/mean_terminated_length": 543.7857055664062, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.45714285714285713, "grad_norm": 0.14375391602516174, "learning_rate": 1e-06, "loss": 0.052, "num_tokens": 37094438.0, "reward": 0.5535017251968384, "reward_std": 0.22007183730602264, "rewards/simpleverify_reward/mean": 0.5535016655921936, "rewards/simpleverify_reward/std": 0.4971466660499573, "step": 49 }, { "clip_ratio/high_max": 0.002432263663649792, "clip_ratio/high_mean": 0.001072027798727504, "clip_ratio/low_mean": 0.0005879803611605894, "clip_ratio/low_min": 2.8898682103317697e-05, "clip_ratio/region_mean": 0.0016600081544311251, "epoch": 0.46647230320699706, "grad_norm": 0.14273764193058014, "learning_rate": 1e-06, "loss": 0.0229, "step": 50 }, { "clip_ratio/high_max": 0.002283059038745705, "clip_ratio/high_mean": 0.0008509316612617113, "clip_ratio/low_mean": 0.0006286275620368542, "clip_ratio/low_min": 4.3187021219637245e-05, "clip_ratio/region_mean": 0.0014795592251175549, "epoch": 0.47580174927113705, "grad_norm": 0.13006703555583954, "learning_rate": 1e-06, "loss": 0.0366, "step": 51 }, { "clip_ratio/high_max": 0.001999438914936036, "clip_ratio/high_mean": 0.000839833186546457, "clip_ratio/low_mean": 0.0006693369996355614, "clip_ratio/low_min": 0.0001344475867881556, "clip_ratio/region_mean": 0.001509170175268082, "epoch": 0.485131195335277, "grad_norm": 0.12139897048473358, "learning_rate": 1e-06, "loss": 0.0392, "step": 52 }, { "clip_ratio/high_max": 0.0017305718283751048, "clip_ratio/high_mean": 0.0008292601032735547, "clip_ratio/low_mean": 0.0006995422263571527, "clip_ratio/low_min": 1.3255566955194809e-05, "clip_ratio/region_mean": 0.001528802309621824, "epoch": 0.4944606413994169, "grad_norm": 0.12202150374650955, "learning_rate": 1e-06, "loss": 0.0232, "step": 53 }, { "clip_ratio/high_max": 0.002160885836929083, "clip_ratio/high_mean": 0.0008992203784146113, "clip_ratio/low_mean": 0.0006999922206887277, "clip_ratio/low_min": 5.306989078235347e-05, "clip_ratio/region_mean": 0.0015992125772754662, "epoch": 0.5037900874635568, "grad_norm": 0.11912939697504044, "learning_rate": 1e-06, "loss": 0.0195, "step": 54 }, { "clip_ratio/high_max": 0.0021769391605630517, "clip_ratio/high_mean": 0.0009708462039270671, "clip_ratio/low_mean": 0.0005107549704916892, "clip_ratio/low_min": 3.0613970920967404e-05, "clip_ratio/region_mean": 0.0014816011789662298, "epoch": 0.5131195335276968, "grad_norm": 0.11994564533233643, "learning_rate": 1e-06, "loss": -0.0381, "step": 55 }, { "clip_ratio/high_max": 0.002153600475139683, "clip_ratio/high_mean": 0.0010197883693763288, "clip_ratio/low_mean": 0.0007438701395585667, "clip_ratio/low_min": 5.917981343372958e-05, "clip_ratio/region_mean": 0.0017636585362197366, "epoch": 0.5224489795918368, "grad_norm": 0.11797942966222763, "learning_rate": 1e-06, "loss": 0.0144, "step": 56 }, { "clip_ratio/high_max": 0.0023799854679964483, "clip_ratio/high_mean": 0.0010141728489543311, "clip_ratio/low_mean": 0.000737738291718415, "clip_ratio/low_min": 5.345578210835811e-05, "clip_ratio/region_mean": 0.0017519111497676931, "epoch": 0.5317784256559767, "grad_norm": 0.12943631410598755, "learning_rate": 1e-06, "loss": -0.0234, "step": 57 }, { "clip_ratio/high_max": 0.002137411618605256, "clip_ratio/high_mean": 0.0008898486994439736, "clip_ratio/low_mean": 0.0008838032736093737, "clip_ratio/low_min": 0.00010239726543659344, "clip_ratio/region_mean": 0.0017736519366735592, "epoch": 0.5411078717201167, "grad_norm": 0.10991153866052628, "learning_rate": 1e-06, "loss": 0.0094, "step": 58 }, { "clip_ratio/high_max": 0.002323334338143468, "clip_ratio/high_mean": 0.0009539089151076041, "clip_ratio/low_mean": 0.0010366646565671545, "clip_ratio/low_min": 9.5318182502524e-05, "clip_ratio/region_mean": 0.0019905735971406102, "epoch": 0.5504373177842565, "grad_norm": 0.12887339293956757, "learning_rate": 1e-06, "loss": 0.0342, "step": 59 }, { "clip_ratio/high_max": 0.0025891067925840616, "clip_ratio/high_mean": 0.0011386138357920572, "clip_ratio/low_mean": 0.0008775034530117409, "clip_ratio/low_min": 6.345092060655588e-05, "clip_ratio/region_mean": 0.0020161172869848087, "epoch": 0.5597667638483965, "grad_norm": 0.13606473803520203, "learning_rate": 1e-06, "loss": -0.0216, "step": 60 }, { "clip_ratio/high_max": 0.002085479754896369, "clip_ratio/high_mean": 0.0010832430853042752, "clip_ratio/low_mean": 0.000905084700207226, "clip_ratio/low_min": 3.7960113331791945e-05, "clip_ratio/region_mean": 0.0019883278000634164, "epoch": 0.5690962099125364, "grad_norm": 0.13461802899837494, "learning_rate": 1e-06, "loss": -0.0425, "step": 61 }, { "clip_ratio/high_max": 0.0019989337597507983, "clip_ratio/high_mean": 0.00099940512154717, "clip_ratio/low_mean": 0.001074762030839338, "clip_ratio/low_min": 0.00011984079355897848, "clip_ratio/region_mean": 0.0020741671469295397, "epoch": 0.5784256559766764, "grad_norm": 0.12812580168247223, "learning_rate": 1e-06, "loss": 0.0191, "step": 62 }, { "clip_ratio/high_max": 0.0022899679897818714, "clip_ratio/high_mean": 0.0010615860228426754, "clip_ratio/low_mean": 0.001165059376944555, "clip_ratio/low_min": 7.615557751705637e-05, "clip_ratio/region_mean": 0.002226645410701167, "epoch": 0.5877551020408164, "grad_norm": 0.12896870076656342, "learning_rate": 1e-06, "loss": -0.0162, "step": 63 }, { "clip_ratio/high_max": 0.002399393211817369, "clip_ratio/high_mean": 0.0010834128697752021, "clip_ratio/low_mean": 0.0010038851123681525, "clip_ratio/low_min": 6.874884547869442e-05, "clip_ratio/region_mean": 0.002087298002152238, "epoch": 0.5970845481049563, "grad_norm": 0.13988251984119415, "learning_rate": 1e-06, "loss": -0.0338, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01416015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 619.6920166015625, "completions/mean_terminated_length": 569.7598876953125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.6064139941690962, "grad_norm": 0.10815761238336563, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 46621254.0, "reward": 0.5411551594734192, "reward_std": 0.21858219802379608, "rewards/simpleverify_reward/mean": 0.5411551594734192, "rewards/simpleverify_reward/std": 0.4983207583427429, "step": 65 }, { "clip_ratio/high_max": 0.0022680325419059955, "clip_ratio/high_mean": 0.0009119804926740471, "clip_ratio/low_mean": 0.0005281932499201503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014401737389562186, "epoch": 0.6157434402332361, "grad_norm": 0.12402459979057312, "learning_rate": 1e-06, "loss": -0.0411, "step": 66 }, { "clip_ratio/high_max": 0.0019378016440896317, "clip_ratio/high_mean": 0.0009241701554856263, "clip_ratio/low_mean": 0.0006772949582227739, "clip_ratio/low_min": 8.953528504207497e-05, "clip_ratio/region_mean": 0.001601465126441326, "epoch": 0.6250728862973761, "grad_norm": 0.11827189475297928, "learning_rate": 1e-06, "loss": 0.009, "step": 67 }, { "clip_ratio/high_max": 0.002131821005605161, "clip_ratio/high_mean": 0.0009338440177089069, "clip_ratio/low_mean": 0.0006258384710235987, "clip_ratio/low_min": 4.0391404581896495e-05, "clip_ratio/region_mean": 0.0015596825214743149, "epoch": 0.634402332361516, "grad_norm": 0.1251201331615448, "learning_rate": 1e-06, "loss": -0.0059, "step": 68 }, { "clip_ratio/high_max": 0.0021377311313699465, "clip_ratio/high_mean": 0.0009294220199080883, "clip_ratio/low_mean": 0.0006805675857322058, "clip_ratio/low_min": 2.8755463063134812e-05, "clip_ratio/region_mean": 0.0016099896092782728, "epoch": 0.643731778425656, "grad_norm": 0.12024405598640442, "learning_rate": 1e-06, "loss": 0.0318, "step": 69 }, { "clip_ratio/high_max": 0.002136308426997857, "clip_ratio/high_mean": 0.0009945343499566661, "clip_ratio/low_mean": 0.0006215885287019773, "clip_ratio/low_min": 2.4768064577074256e-05, "clip_ratio/region_mean": 0.0016161228413693607, "epoch": 0.6530612244897959, "grad_norm": 0.12535366415977478, "learning_rate": 1e-06, "loss": -0.0298, "step": 70 }, { "clip_ratio/high_max": 0.0018523874168749899, "clip_ratio/high_mean": 0.0008041732126002898, "clip_ratio/low_mean": 0.0007256802418851294, "clip_ratio/low_min": 5.363798754842719e-05, "clip_ratio/region_mean": 0.0015298534563044086, "epoch": 0.6623906705539359, "grad_norm": 0.1205780953168869, "learning_rate": 1e-06, "loss": 0.0562, "step": 71 }, { "clip_ratio/high_max": 0.0020143017391092144, "clip_ratio/high_mean": 0.0008350821899512084, "clip_ratio/low_mean": 0.0008935764763009502, "clip_ratio/low_min": 0.00014684074540127767, "clip_ratio/region_mean": 0.0017286586444242857, "epoch": 0.6717201166180758, "grad_norm": 0.11822529137134552, "learning_rate": 1e-06, "loss": 0.009, "step": 72 }, { "clip_ratio/high_max": 0.002461064330418594, "clip_ratio/high_mean": 0.001023170607368229, "clip_ratio/low_mean": 0.0008469953918393003, "clip_ratio/low_min": 8.23627724457765e-05, "clip_ratio/region_mean": 0.0018701660155784339, "epoch": 0.6810495626822157, "grad_norm": 0.12145008146762848, "learning_rate": 1e-06, "loss": 0.0023, "step": 73 }, { "clip_ratio/high_max": 0.0022504045700770803, "clip_ratio/high_mean": 0.0009958397749869619, "clip_ratio/low_mean": 0.0008937774764490314, "clip_ratio/low_min": 8.235894529207144e-05, "clip_ratio/region_mean": 0.0018896172623499297, "epoch": 0.6903790087463557, "grad_norm": 0.11873778700828552, "learning_rate": 1e-06, "loss": 0.0056, "step": 74 }, { "clip_ratio/high_max": 0.002351994226046372, "clip_ratio/high_mean": 0.0009669927167124115, "clip_ratio/low_mean": 0.0009786350146896439, "clip_ratio/low_min": 0.00012304711526667234, "clip_ratio/region_mean": 0.0019456277805147693, "epoch": 0.6997084548104956, "grad_norm": 0.131026029586792, "learning_rate": 1e-06, "loss": 0.0241, "step": 75 }, { "clip_ratio/high_max": 0.0020935145621479023, "clip_ratio/high_mean": 0.0008502227738063084, "clip_ratio/low_mean": 0.0009927259779942688, "clip_ratio/low_min": 0.00010439355446578702, "clip_ratio/region_mean": 0.0018429487608955242, "epoch": 0.7090379008746356, "grad_norm": 0.12419907003641129, "learning_rate": 1e-06, "loss": 0.0394, "step": 76 }, { "clip_ratio/high_max": 0.0019723946788872126, "clip_ratio/high_mean": 0.0009064568439498544, "clip_ratio/low_mean": 0.0009719163572299294, "clip_ratio/low_min": 9.30518990571727e-05, "clip_ratio/region_mean": 0.0018783731502480805, "epoch": 0.7183673469387755, "grad_norm": 0.11317285895347595, "learning_rate": 1e-06, "loss": -0.0275, "step": 77 }, { "clip_ratio/high_max": 0.0025763329977053218, "clip_ratio/high_mean": 0.0010082886001328006, "clip_ratio/low_mean": 0.0010902876347245183, "clip_ratio/low_min": 0.0001640887130633928, "clip_ratio/region_mean": 0.0020985762821510434, "epoch": 0.7276967930029155, "grad_norm": 0.12166618555784225, "learning_rate": 1e-06, "loss": 0.0072, "step": 78 }, { "clip_ratio/high_max": 0.0021272204030537978, "clip_ratio/high_mean": 0.0009146352149400627, "clip_ratio/low_mean": 0.0010314851660950808, "clip_ratio/low_min": 9.942216092895251e-05, "clip_ratio/region_mean": 0.001946120333741419, "epoch": 0.7370262390670554, "grad_norm": 0.11663459241390228, "learning_rate": 1e-06, "loss": -0.0202, "step": 79 }, { "clip_ratio/high_max": 0.0019168857979821041, "clip_ratio/high_mean": 0.0008468138476018794, "clip_ratio/low_mean": 0.001226578406203771, "clip_ratio/low_min": 0.00011152384104207158, "clip_ratio/region_mean": 0.002073392221063841, "epoch": 0.7463556851311953, "grad_norm": 0.12021946161985397, "learning_rate": 1e-06, "loss": 0.0318, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012486049107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 610.6770629882812, "completions/mean_terminated_length": 566.60888671875, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.7556851311953353, "grad_norm": 0.1233874037861824, "learning_rate": 1e-06, "loss": -0.0136, "num_tokens": 56060560.0, "reward": 0.5696150064468384, "reward_std": 0.209486186504364, "rewards/simpleverify_reward/mean": 0.5696149468421936, "rewards/simpleverify_reward/std": 0.49514731764793396, "step": 81 }, { "clip_ratio/high_max": 0.001889405230031116, "clip_ratio/high_mean": 0.0008385150758840609, "clip_ratio/low_mean": 0.0005991889865981648, "clip_ratio/low_min": 6.593239959329367e-05, "clip_ratio/region_mean": 0.0014377040897670668, "epoch": 0.7650145772594752, "grad_norm": 0.12283451855182648, "learning_rate": 1e-06, "loss": -0.0065, "step": 82 }, { "clip_ratio/high_max": 0.0018661120921024121, "clip_ratio/high_mean": 0.0008030472654354526, "clip_ratio/low_mean": 0.0005979592897347175, "clip_ratio/low_min": 2.8768737138307188e-05, "clip_ratio/region_mean": 0.0014010065788170323, "epoch": 0.7743440233236152, "grad_norm": 0.1187792494893074, "learning_rate": 1e-06, "loss": 0.0282, "step": 83 }, { "clip_ratio/high_max": 0.001902804709970951, "clip_ratio/high_mean": 0.0008287325490528019, "clip_ratio/low_mean": 0.0005685124469891889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013972450069559272, "epoch": 0.7836734693877551, "grad_norm": 0.10965719074010849, "learning_rate": 1e-06, "loss": -0.0031, "step": 84 }, { "clip_ratio/high_max": 0.0019044405744352844, "clip_ratio/high_mean": 0.0008736350882827537, "clip_ratio/low_mean": 0.0006812759675085545, "clip_ratio/low_min": 1.130198870669119e-05, "clip_ratio/region_mean": 0.0015549110830761492, "epoch": 0.793002915451895, "grad_norm": 0.1237308606505394, "learning_rate": 1e-06, "loss": 0.0534, "step": 85 }, { "clip_ratio/high_max": 0.00214676595714991, "clip_ratio/high_mean": 0.0007923696375655709, "clip_ratio/low_mean": 0.0006055713265595841, "clip_ratio/low_min": 2.309873980266275e-05, "clip_ratio/region_mean": 0.0013979409995954484, "epoch": 0.8023323615160349, "grad_norm": 0.12092987447977066, "learning_rate": 1e-06, "loss": 0.0417, "step": 86 }, { "clip_ratio/high_max": 0.002106822772475425, "clip_ratio/high_mean": 0.00096737969397509, "clip_ratio/low_mean": 0.0007467069881386124, "clip_ratio/low_min": 0.00010381496940681245, "clip_ratio/region_mean": 0.001714086698484607, "epoch": 0.8116618075801749, "grad_norm": 0.12485820800065994, "learning_rate": 1e-06, "loss": 0.0057, "step": 87 }, { "clip_ratio/high_max": 0.0028218712614034303, "clip_ratio/high_mean": 0.0011347466461302247, "clip_ratio/low_mean": 0.0007566670119558694, "clip_ratio/low_min": 5.205873276281636e-05, "clip_ratio/region_mean": 0.0018914136526291259, "epoch": 0.8209912536443149, "grad_norm": 0.12465086579322815, "learning_rate": 1e-06, "loss": -0.0308, "step": 88 }, { "clip_ratio/high_max": 0.002303111021319637, "clip_ratio/high_mean": 0.0010447673375892919, "clip_ratio/low_mean": 0.0007018665528448764, "clip_ratio/low_min": 1.7585818568477407e-05, "clip_ratio/region_mean": 0.001746633934089914, "epoch": 0.8303206997084548, "grad_norm": 0.12621267139911652, "learning_rate": 1e-06, "loss": -0.0103, "step": 89 }, { "clip_ratio/high_max": 0.0021849671356903855, "clip_ratio/high_mean": 0.0008819183913146844, "clip_ratio/low_mean": 0.0008500138828821946, "clip_ratio/low_min": 0.00010109425329574151, "clip_ratio/region_mean": 0.0017319322796538472, "epoch": 0.8396501457725948, "grad_norm": 0.13000263273715973, "learning_rate": 1e-06, "loss": 0.0413, "step": 90 }, { "clip_ratio/high_max": 0.0021064247921458445, "clip_ratio/high_mean": 0.0009473441823502071, "clip_ratio/low_mean": 0.0008334923531947425, "clip_ratio/low_min": 3.460721654846566e-05, "clip_ratio/region_mean": 0.0017808365519158542, "epoch": 0.8489795918367347, "grad_norm": 0.11576870828866959, "learning_rate": 1e-06, "loss": 0.0214, "step": 91 }, { "clip_ratio/high_max": 0.0024393446547037456, "clip_ratio/high_mean": 0.0009928963190759532, "clip_ratio/low_mean": 0.0007978439698490547, "clip_ratio/low_min": 3.48989251506282e-05, "clip_ratio/region_mean": 0.0017907403234858066, "epoch": 0.8583090379008746, "grad_norm": 0.1261681616306305, "learning_rate": 1e-06, "loss": 0.0153, "step": 92 }, { "clip_ratio/high_max": 0.0019722015858860686, "clip_ratio/high_mean": 0.0009134247156907804, "clip_ratio/low_mean": 0.0009559801292198244, "clip_ratio/low_min": 0.00020906252211716492, "clip_ratio/region_mean": 0.0018694048485485837, "epoch": 0.8676384839650145, "grad_norm": 0.12264375388622284, "learning_rate": 1e-06, "loss": 0.0204, "step": 93 }, { "clip_ratio/high_max": 0.0021383087514550425, "clip_ratio/high_mean": 0.000979981856289669, "clip_ratio/low_mean": 0.0008665554305480327, "clip_ratio/low_min": 8.252622137661092e-05, "clip_ratio/region_mean": 0.0018465372850187123, "epoch": 0.8769679300291545, "grad_norm": 0.11547290533781052, "learning_rate": 1e-06, "loss": -0.0164, "step": 94 }, { "clip_ratio/high_max": 0.0022253611241467297, "clip_ratio/high_mean": 0.0009895051734929439, "clip_ratio/low_mean": 0.0009344983118353412, "clip_ratio/low_min": 3.0166904252837412e-05, "clip_ratio/region_mean": 0.0019240035180700943, "epoch": 0.8862973760932945, "grad_norm": 0.12637680768966675, "learning_rate": 1e-06, "loss": -0.0012, "step": 95 }, { "clip_ratio/high_max": 0.00247694349673111, "clip_ratio/high_mean": 0.0010360104060964659, "clip_ratio/low_mean": 0.001007461607514415, "clip_ratio/low_min": 4.552526843326632e-05, "clip_ratio/region_mean": 0.0020434720208868384, "epoch": 0.8956268221574344, "grad_norm": 0.11487600207328796, "learning_rate": 1e-06, "loss": -0.0219, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015276227678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 614.6165161132812, "completions/mean_terminated_length": 560.6090087890625, "completions/min_length": 66.0, "completions/min_terminated_length": 66.0, "epoch": 1.00932944606414, "grad_norm": 0.12858156859874725, "learning_rate": 1e-06, "loss": 0.0257, "num_tokens": 65426926.0, "reward": 0.5675223469734192, "reward_std": 0.20705661177635193, "rewards/simpleverify_reward/mean": 0.5675223469734192, "rewards/simpleverify_reward/std": 0.49543702602386475, "step": 97 }, { "clip_ratio/high_max": 0.002431881770462496, "clip_ratio/high_mean": 0.000976610428551794, "clip_ratio/low_mean": 0.0005051387042840361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014817491173744202, "epoch": 1.01865889212828, "grad_norm": 0.11597147583961487, "learning_rate": 1e-06, "loss": -0.0219, "step": 98 }, { "clip_ratio/high_max": 0.0023203128439490683, "clip_ratio/high_mean": 0.00102293291274691, "clip_ratio/low_mean": 0.000633500602816639, "clip_ratio/low_min": 3.873580408253474e-05, "clip_ratio/region_mean": 0.0016564334946451709, "epoch": 1.0279883381924197, "grad_norm": 0.1402452141046524, "learning_rate": 1e-06, "loss": -0.0266, "step": 99 }, { "clip_ratio/high_max": 0.0019849219970637932, "clip_ratio/high_mean": 0.000828961222396174, "clip_ratio/low_mean": 0.0007217003412733902, "clip_ratio/low_min": 2.388379925832851e-05, "clip_ratio/region_mean": 0.0015506615691265324, "epoch": 1.0373177842565597, "grad_norm": 0.12131818383932114, "learning_rate": 1e-06, "loss": 0.0428, "step": 100 }, { "clip_ratio/high_max": 0.0022726400347892195, "clip_ratio/high_mean": 0.0009297973647335311, "clip_ratio/low_mean": 0.0006083200987632154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001538117474410683, "epoch": 1.0466472303206997, "grad_norm": 0.12470761686563492, "learning_rate": 1e-06, "loss": -0.0268, "step": 101 }, { "clip_ratio/high_max": 0.0020679739682236686, "clip_ratio/high_mean": 0.0008917481718526687, "clip_ratio/low_mean": 0.0007550644750153879, "clip_ratio/low_min": 5.8093863117392175e-05, "clip_ratio/region_mean": 0.0016468126632389612, "epoch": 1.0559766763848397, "grad_norm": 0.12059267610311508, "learning_rate": 1e-06, "loss": -0.004, "step": 102 }, { "clip_ratio/high_max": 0.0017993911460507661, "clip_ratio/high_mean": 0.0008291297963296529, "clip_ratio/low_mean": 0.0007258171608555131, "clip_ratio/low_min": 2.871238575608004e-05, "clip_ratio/region_mean": 0.0015549469462712295, "epoch": 1.0653061224489795, "grad_norm": 0.12149995565414429, "learning_rate": 1e-06, "loss": -0.0004, "step": 103 }, { "clip_ratio/high_max": 0.0026587499014567584, "clip_ratio/high_mean": 0.0010635393882694189, "clip_ratio/low_mean": 0.0006956002580409404, "clip_ratio/low_min": 2.889693405450089e-05, "clip_ratio/region_mean": 0.0017591396208445076, "epoch": 1.0746355685131195, "grad_norm": 0.11654109507799149, "learning_rate": 1e-06, "loss": -0.0156, "step": 104 }, { "clip_ratio/high_max": 0.00191625867591938, "clip_ratio/high_mean": 0.0008249730199167971, "clip_ratio/low_mean": 0.0007710090249020141, "clip_ratio/low_min": 2.731515996856615e-05, "clip_ratio/region_mean": 0.001595982084836578, "epoch": 1.0839650145772595, "grad_norm": 0.11643777787685394, "learning_rate": 1e-06, "loss": 0.0141, "step": 105 }, { "clip_ratio/high_max": 0.001958127126272302, "clip_ratio/high_mean": 0.0007795016499585472, "clip_ratio/low_mean": 0.0009982234169001458, "clip_ratio/low_min": 0.00026187406183453277, "clip_ratio/region_mean": 0.00177772507595364, "epoch": 1.0932944606413995, "grad_norm": 0.1366950124502182, "learning_rate": 1e-06, "loss": 0.1203, "step": 106 }, { "clip_ratio/high_max": 0.0021125095008756034, "clip_ratio/high_mean": 0.0009125962351390626, "clip_ratio/low_mean": 0.0008739082113606855, "clip_ratio/low_min": 0.00012220687676745, "clip_ratio/region_mean": 0.0017865043992060237, "epoch": 1.1026239067055394, "grad_norm": 0.11763796210289001, "learning_rate": 1e-06, "loss": 0.0286, "step": 107 }, { "clip_ratio/high_max": 0.002092786569846794, "clip_ratio/high_mean": 0.0008877935415512184, "clip_ratio/low_mean": 0.0009199606429319829, "clip_ratio/low_min": 0.00011456518950581085, "clip_ratio/region_mean": 0.0018077541753882542, "epoch": 1.1119533527696792, "grad_norm": 0.11116166412830353, "learning_rate": 1e-06, "loss": 0.0277, "step": 108 }, { "clip_ratio/high_max": 0.002078748613712378, "clip_ratio/high_mean": 0.0009001554899441544, "clip_ratio/low_mean": 0.0009645700993132778, "clip_ratio/low_min": 0.0001370410091112717, "clip_ratio/region_mean": 0.0018647255783434957, "epoch": 1.1212827988338192, "grad_norm": 0.1289977729320526, "learning_rate": 1e-06, "loss": 0.0237, "step": 109 }, { "clip_ratio/high_max": 0.002635292745253537, "clip_ratio/high_mean": 0.0010960557883663569, "clip_ratio/low_mean": 0.0007941956391732674, "clip_ratio/low_min": 1.323311425949214e-05, "clip_ratio/region_mean": 0.0018902514420915395, "epoch": 1.1306122448979592, "grad_norm": 0.11401235312223434, "learning_rate": 1e-06, "loss": -0.0435, "step": 110 }, { "clip_ratio/high_max": 0.002264108079543803, "clip_ratio/high_mean": 0.0009294000301451888, "clip_ratio/low_mean": 0.0008951653653639369, "clip_ratio/low_min": 2.3484310077037662e-05, "clip_ratio/region_mean": 0.0018245654136990197, "epoch": 1.1399416909620992, "grad_norm": 0.11076462268829346, "learning_rate": 1e-06, "loss": -0.0093, "step": 111 }, { "clip_ratio/high_max": 0.00219617073162226, "clip_ratio/high_mean": 0.0009168490469164681, "clip_ratio/low_mean": 0.0009470912464166759, "clip_ratio/low_min": 8.236750727519393e-05, "clip_ratio/region_mean": 0.0018639402915141545, "epoch": 1.149271137026239, "grad_norm": 0.13221172988414764, "learning_rate": 1e-06, "loss": 0.0305, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 615.2047119140625, "completions/mean_terminated_length": 559.9539184570312, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.158600583090379, "grad_norm": 0.1220395565032959, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 74750804.0, "reward": 0.5720564126968384, "reward_std": 0.20371243357658386, "rewards/simpleverify_reward/mean": 0.5720563530921936, "rewards/simpleverify_reward/std": 0.49479788541793823, "step": 113 }, { "clip_ratio/high_max": 0.0019257487329014111, "clip_ratio/high_mean": 0.0008828264908515848, "clip_ratio/low_mean": 0.0006228404581634095, "clip_ratio/low_min": 2.6074259949382395e-05, "clip_ratio/region_mean": 0.001505666925368132, "epoch": 1.167930029154519, "grad_norm": 0.12624193727970123, "learning_rate": 1e-06, "loss": -0.0317, "step": 114 }, { "clip_ratio/high_max": 0.001973774295038311, "clip_ratio/high_mean": 0.0008221334392146673, "clip_ratio/low_mean": 0.0006215740722836927, "clip_ratio/low_min": 5.051659627497429e-05, "clip_ratio/region_mean": 0.0014437074896704871, "epoch": 1.177259475218659, "grad_norm": 0.11821143329143524, "learning_rate": 1e-06, "loss": 0.0097, "step": 115 }, { "clip_ratio/high_max": 0.002182784468459431, "clip_ratio/high_mean": 0.0009378793329233304, "clip_ratio/low_mean": 0.0006621991506108316, "clip_ratio/low_min": 3.928378919226816e-05, "clip_ratio/region_mean": 0.0016000784307834692, "epoch": 1.186588921282799, "grad_norm": 0.12873928248882294, "learning_rate": 1e-06, "loss": 0.0364, "step": 116 }, { "clip_ratio/high_max": 0.0020524796891550068, "clip_ratio/high_mean": 0.0008188594129023841, "clip_ratio/low_mean": 0.0006627361017308431, "clip_ratio/low_min": 2.341665640415158e-05, "clip_ratio/region_mean": 0.0014815955073572695, "epoch": 1.1959183673469387, "grad_norm": 0.11923913657665253, "learning_rate": 1e-06, "loss": 0.0318, "step": 117 }, { "clip_ratio/high_max": 0.0018825178922270425, "clip_ratio/high_mean": 0.0007956151785037946, "clip_ratio/low_mean": 0.0007509429906349396, "clip_ratio/low_min": 8.08084105301532e-05, "clip_ratio/region_mean": 0.0015465581745957024, "epoch": 1.2052478134110787, "grad_norm": 0.11949536949396133, "learning_rate": 1e-06, "loss": 0.0025, "step": 118 }, { "clip_ratio/high_max": 0.002166791364288656, "clip_ratio/high_mean": 0.0009007321896206122, "clip_ratio/low_mean": 0.0006874364808027167, "clip_ratio/low_min": 5.688313376595033e-05, "clip_ratio/region_mean": 0.001588168648595456, "epoch": 1.2145772594752187, "grad_norm": 0.13457559049129486, "learning_rate": 1e-06, "loss": 0.0208, "step": 119 }, { "clip_ratio/high_max": 0.002059636353806127, "clip_ratio/high_mean": 0.0008873351507645566, "clip_ratio/low_mean": 0.0006663190561084775, "clip_ratio/low_min": 1.4497796655632555e-05, "clip_ratio/region_mean": 0.0015536541977780871, "epoch": 1.2239067055393587, "grad_norm": 0.12186510115861893, "learning_rate": 1e-06, "loss": -0.0162, "step": 120 }, { "clip_ratio/high_max": 0.0021562619294854812, "clip_ratio/high_mean": 0.0009943760924215894, "clip_ratio/low_mean": 0.0007377081710728817, "clip_ratio/low_min": 7.509787974413484e-05, "clip_ratio/region_mean": 0.0017320842380286194, "epoch": 1.2332361516034984, "grad_norm": 0.1360846310853958, "learning_rate": 1e-06, "loss": -0.0297, "step": 121 }, { "clip_ratio/high_max": 0.0024428275719401427, "clip_ratio/high_mean": 0.0009363184908579569, "clip_ratio/low_mean": 0.0007870414374337997, "clip_ratio/low_min": 3.312385433673626e-05, "clip_ratio/region_mean": 0.0017233599501196295, "epoch": 1.2425655976676384, "grad_norm": 0.11625058948993683, "learning_rate": 1e-06, "loss": -0.0285, "step": 122 }, { "clip_ratio/high_max": 0.002062764200672973, "clip_ratio/high_mean": 0.0008563195460737916, "clip_ratio/low_mean": 0.0007933507495181402, "clip_ratio/low_min": 6.350878902594559e-05, "clip_ratio/region_mean": 0.0016496703028678894, "epoch": 1.2518950437317784, "grad_norm": 0.12811847031116486, "learning_rate": 1e-06, "loss": 0.0295, "step": 123 }, { "clip_ratio/high_max": 0.0021896275829931255, "clip_ratio/high_mean": 0.0009338609488622751, "clip_ratio/low_mean": 0.0008893635367712704, "clip_ratio/low_min": 7.459127209585859e-05, "clip_ratio/region_mean": 0.0018232244910905138, "epoch": 1.2612244897959184, "grad_norm": 0.12722264230251312, "learning_rate": 1e-06, "loss": 0.0183, "step": 124 }, { "clip_ratio/high_max": 0.002355482280108845, "clip_ratio/high_mean": 0.0010034805709437933, "clip_ratio/low_mean": 0.0009903699483402306, "clip_ratio/low_min": 8.284199793706648e-05, "clip_ratio/region_mean": 0.0019938504847232252, "epoch": 1.2705539358600584, "grad_norm": 0.12042288482189178, "learning_rate": 1e-06, "loss": -0.0039, "step": 125 }, { "clip_ratio/high_max": 0.0024233122530858964, "clip_ratio/high_mean": 0.0009240177714673337, "clip_ratio/low_mean": 0.0010230529424006818, "clip_ratio/low_min": 4.78364836453693e-05, "clip_ratio/region_mean": 0.0019470706974971108, "epoch": 1.2798833819241984, "grad_norm": 0.11858785897493362, "learning_rate": 1e-06, "loss": 0.0466, "step": 126 }, { "clip_ratio/high_max": 0.00224157049524365, "clip_ratio/high_mean": 0.0009216578473569825, "clip_ratio/low_mean": 0.0008162433714460349, "clip_ratio/low_min": 4.437068855622783e-05, "clip_ratio/region_mean": 0.0017379012497258373, "epoch": 1.2892128279883381, "grad_norm": 0.1165943369269371, "learning_rate": 1e-06, "loss": 0.0068, "step": 127 }, { "clip_ratio/high_max": 0.0023815598760847934, "clip_ratio/high_mean": 0.0009981660696212202, "clip_ratio/low_mean": 0.0009986253498937003, "clip_ratio/low_min": 0.00015665531645936426, "clip_ratio/region_mean": 0.001996791426790878, "epoch": 1.2985422740524781, "grad_norm": 0.1301276534795761, "learning_rate": 1e-06, "loss": -0.0182, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0161830357142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 626.4522705078125, "completions/mean_terminated_length": 569.3807983398438, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 1.3078717201166181, "grad_norm": 0.13251464068889618, "learning_rate": 1e-06, "loss": -0.0269, "num_tokens": 84229263.0, "reward": 0.5663365125656128, "reward_std": 0.20097027719020844, "rewards/simpleverify_reward/mean": 0.5663365125656128, "rewards/simpleverify_reward/std": 0.4955971837043762, "step": 129 }, { "clip_ratio/high_max": 0.002244508359581232, "clip_ratio/high_mean": 0.0008610586082795635, "clip_ratio/low_mean": 0.0005041469321440673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013652055495185778, "epoch": 1.3172011661807579, "grad_norm": 0.12286481261253357, "learning_rate": 1e-06, "loss": -0.015, "step": 130 }, { "clip_ratio/high_max": 0.0018510533482185565, "clip_ratio/high_mean": 0.0007770324718876509, "clip_ratio/low_mean": 0.0005723797066821135, "clip_ratio/low_min": 1.3775622392131481e-05, "clip_ratio/region_mean": 0.0013494121994881425, "epoch": 1.3265306122448979, "grad_norm": 0.12383156269788742, "learning_rate": 1e-06, "loss": 0.0174, "step": 131 }, { "clip_ratio/high_max": 0.0025119927668129094, "clip_ratio/high_mean": 0.0009405581695318688, "clip_ratio/low_mean": 0.0006287046453508083, "clip_ratio/low_min": 2.5759878553799354e-05, "clip_ratio/region_mean": 0.001569262778502889, "epoch": 1.3358600583090379, "grad_norm": 0.127622589468956, "learning_rate": 1e-06, "loss": -0.0127, "step": 132 }, { "clip_ratio/high_max": 0.001959026547410758, "clip_ratio/high_mean": 0.0008821384853945347, "clip_ratio/low_mean": 0.000682516551023582, "clip_ratio/low_min": 2.9925784474471584e-05, "clip_ratio/region_mean": 0.0015646550527890213, "epoch": 1.3451895043731779, "grad_norm": 0.12017498910427094, "learning_rate": 1e-06, "loss": 0.036, "step": 133 }, { "clip_ratio/high_max": 0.0018977917861775495, "clip_ratio/high_mean": 0.0008048139570746571, "clip_ratio/low_mean": 0.0007295015384443104, "clip_ratio/low_min": 9.614997634344036e-05, "clip_ratio/region_mean": 0.0015343154809670523, "epoch": 1.3545189504373178, "grad_norm": 0.11897505819797516, "learning_rate": 1e-06, "loss": 0.016, "step": 134 }, { "clip_ratio/high_max": 0.002295372418302577, "clip_ratio/high_mean": 0.0008898319210857153, "clip_ratio/low_mean": 0.0007900418477220228, "clip_ratio/low_min": 8.652223641547607e-05, "clip_ratio/region_mean": 0.001679873741522897, "epoch": 1.3638483965014578, "grad_norm": 0.11808433383703232, "learning_rate": 1e-06, "loss": -0.0048, "step": 135 }, { "clip_ratio/high_max": 0.002041238738456741, "clip_ratio/high_mean": 0.0009150742498604814, "clip_ratio/low_mean": 0.0007647128386452096, "clip_ratio/low_min": 3.2516913051949814e-05, "clip_ratio/region_mean": 0.0016797870848677121, "epoch": 1.3731778425655976, "grad_norm": 0.12002668529748917, "learning_rate": 1e-06, "loss": 0.0086, "step": 136 }, { "clip_ratio/high_max": 0.002025498921284452, "clip_ratio/high_mean": 0.0008324968357555917, "clip_ratio/low_mean": 0.00083963189899805, "clip_ratio/low_min": 6.379394471878186e-05, "clip_ratio/region_mean": 0.0016721287029213272, "epoch": 1.3825072886297376, "grad_norm": 0.12069258838891983, "learning_rate": 1e-06, "loss": 0.0195, "step": 137 }, { "clip_ratio/high_max": 0.0021086534798087087, "clip_ratio/high_mean": 0.0009510489690001123, "clip_ratio/low_mean": 0.0009444084098504391, "clip_ratio/low_min": 4.686885313276434e-05, "clip_ratio/region_mean": 0.0018954574152303394, "epoch": 1.3918367346938776, "grad_norm": 0.12374761700630188, "learning_rate": 1e-06, "loss": -0.0188, "step": 138 }, { "clip_ratio/high_max": 0.0021936041266599204, "clip_ratio/high_mean": 0.0009361663687741384, "clip_ratio/low_mean": 0.0008298485681734746, "clip_ratio/low_min": 0.0001215358797708177, "clip_ratio/region_mean": 0.001766014909662772, "epoch": 1.4011661807580174, "grad_norm": 0.13761506974697113, "learning_rate": 1e-06, "loss": -0.0225, "step": 139 }, { "clip_ratio/high_max": 0.0016457102010463132, "clip_ratio/high_mean": 0.000751510621739726, "clip_ratio/low_mean": 0.0008414258827542653, "clip_ratio/low_min": 7.546025244664634e-05, "clip_ratio/region_mean": 0.001592936518136412, "epoch": 1.4104956268221573, "grad_norm": 0.10721629112958908, "learning_rate": 1e-06, "loss": 0.0296, "step": 140 }, { "clip_ratio/high_max": 0.0017921959442901425, "clip_ratio/high_mean": 0.0008461397137580207, "clip_ratio/low_mean": 0.0009921405762725044, "clip_ratio/low_min": 0.00014551327512890566, "clip_ratio/region_mean": 0.0018382802954874933, "epoch": 1.4198250728862973, "grad_norm": 0.11098704487085342, "learning_rate": 1e-06, "loss": 0.0299, "step": 141 }, { "clip_ratio/high_max": 0.0019253180216765031, "clip_ratio/high_mean": 0.0008908760255508241, "clip_ratio/low_mean": 0.0009555358192301355, "clip_ratio/low_min": 6.691736143693561e-05, "clip_ratio/region_mean": 0.0018464118475094438, "epoch": 1.4291545189504373, "grad_norm": 0.11369307339191437, "learning_rate": 1e-06, "loss": -0.0138, "step": 142 }, { "clip_ratio/high_max": 0.0018603677599458024, "clip_ratio/high_mean": 0.0007390261171167367, "clip_ratio/low_mean": 0.0009189535467157839, "clip_ratio/low_min": 4.798382678927737e-05, "clip_ratio/region_mean": 0.0016579797120357398, "epoch": 1.4384839650145773, "grad_norm": 0.11071496456861496, "learning_rate": 1e-06, "loss": 0.0447, "step": 143 }, { "clip_ratio/high_max": 0.002301324981090147, "clip_ratio/high_mean": 0.0009746900905156508, "clip_ratio/low_mean": 0.0009977697536669439, "clip_ratio/low_min": 0.00011829928462248063, "clip_ratio/region_mean": 0.0019724598532775417, "epoch": 1.4478134110787173, "grad_norm": 0.11918730288743973, "learning_rate": 1e-06, "loss": -0.0001, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0156947544642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 631.8088989257812, "completions/mean_terminated_length": 576.5723266601562, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 1.457142857142857, "grad_norm": 0.12190185487270355, "learning_rate": 1e-06, "loss": 0.0174, "num_tokens": 93799275.0, "reward": 0.5670340657234192, "reward_std": 0.2037140280008316, "rewards/simpleverify_reward/mean": 0.5670340657234192, "rewards/simpleverify_reward/std": 0.49550333619117737, "step": 145 }, { "clip_ratio/high_max": 0.001934599753440125, "clip_ratio/high_mean": 0.0008014005852601258, "clip_ratio/low_mean": 0.0005292152745823842, "clip_ratio/low_min": 0.0001035796876749373, "clip_ratio/region_mean": 0.00133061582891969, "epoch": 1.466472303206997, "grad_norm": 0.12768934667110443, "learning_rate": 1e-06, "loss": 0.015, "step": 146 }, { "clip_ratio/high_max": 0.002129538555891486, "clip_ratio/high_mean": 0.000889877810550388, "clip_ratio/low_mean": 0.0005880440567125333, "clip_ratio/low_min": 1.7811342331697233e-05, "clip_ratio/region_mean": 0.001477921865443932, "epoch": 1.475801749271137, "grad_norm": 0.12811371684074402, "learning_rate": 1e-06, "loss": -0.0041, "step": 147 }, { "clip_ratio/high_max": 0.0017311150877503678, "clip_ratio/high_mean": 0.000799733756139176, "clip_ratio/low_mean": 0.0007023644120636163, "clip_ratio/low_min": 6.039615436748136e-05, "clip_ratio/region_mean": 0.0015020981809357181, "epoch": 1.485131195335277, "grad_norm": 0.12255005538463593, "learning_rate": 1e-06, "loss": 0.0197, "step": 148 }, { "clip_ratio/high_max": 0.0018076451087836176, "clip_ratio/high_mean": 0.0008736829440749716, "clip_ratio/low_mean": 0.0005886210074095288, "clip_ratio/low_min": 7.807923429936636e-05, "clip_ratio/region_mean": 0.0014623039351135958, "epoch": 1.4944606413994168, "grad_norm": 0.14571623504161835, "learning_rate": 1e-06, "loss": -0.0145, "step": 149 }, { "clip_ratio/high_max": 0.0019954573399445508, "clip_ratio/high_mean": 0.0007522687374148518, "clip_ratio/low_mean": 0.0006291777481237659, "clip_ratio/low_min": 6.679319722024957e-05, "clip_ratio/region_mean": 0.00138144650918548, "epoch": 1.5037900874635568, "grad_norm": 0.11125117540359497, "learning_rate": 1e-06, "loss": 0.0193, "step": 150 }, { "clip_ratio/high_max": 0.0020873405737802386, "clip_ratio/high_mean": 0.0008825456297927303, "clip_ratio/low_mean": 0.0006258610901568318, "clip_ratio/low_min": 1.3426422810880467e-05, "clip_ratio/region_mean": 0.001508406701759668, "epoch": 1.5131195335276968, "grad_norm": 0.11599573493003845, "learning_rate": 1e-06, "loss": -0.0192, "step": 151 }, { "clip_ratio/high_max": 0.001997087871131953, "clip_ratio/high_mean": 0.0008303733120555989, "clip_ratio/low_mean": 0.0006727317249897169, "clip_ratio/low_min": 1.2969495401193853e-05, "clip_ratio/region_mean": 0.0015031050388643052, "epoch": 1.5224489795918368, "grad_norm": 0.11316283792257309, "learning_rate": 1e-06, "loss": 0.0108, "step": 152 }, { "clip_ratio/high_max": 0.0017787336255423725, "clip_ratio/high_mean": 0.0007943731470732018, "clip_ratio/low_mean": 0.0007544668769696727, "clip_ratio/low_min": 2.94999599645962e-05, "clip_ratio/region_mean": 0.0015488400385947898, "epoch": 1.5317784256559768, "grad_norm": 0.12101984024047852, "learning_rate": 1e-06, "loss": 0.0291, "step": 153 }, { "clip_ratio/high_max": 0.002170098086935468, "clip_ratio/high_mean": 0.0009834577649598941, "clip_ratio/low_mean": 0.0009267411569453543, "clip_ratio/low_min": 0.00012742676153720822, "clip_ratio/region_mean": 0.0019101989055343438, "epoch": 1.5411078717201168, "grad_norm": 0.7688888311386108, "learning_rate": 1e-06, "loss": 0.0301, "step": 154 }, { "clip_ratio/high_max": 0.0020247332358849235, "clip_ratio/high_mean": 0.0009044854850799311, "clip_ratio/low_mean": 0.0007733644488325808, "clip_ratio/low_min": 4.238462133798748e-05, "clip_ratio/region_mean": 0.001677849912084639, "epoch": 1.5504373177842565, "grad_norm": 0.12675753235816956, "learning_rate": 1e-06, "loss": 0.0423, "step": 155 }, { "clip_ratio/high_max": 0.0023779643124726135, "clip_ratio/high_mean": 0.0008760016207816079, "clip_ratio/low_mean": 0.0007575586478196783, "clip_ratio/low_min": 3.948215817217715e-05, "clip_ratio/region_mean": 0.001633560299524106, "epoch": 1.5597667638483965, "grad_norm": 0.12447439134120941, "learning_rate": 1e-06, "loss": 0.0014, "step": 156 }, { "clip_ratio/high_max": 0.0017608412563276943, "clip_ratio/high_mean": 0.0008488712228427175, "clip_ratio/low_mean": 0.0008813729036774021, "clip_ratio/low_min": 6.444994323828723e-05, "clip_ratio/region_mean": 0.0017302441701758653, "epoch": 1.5690962099125363, "grad_norm": 0.11563175916671753, "learning_rate": 1e-06, "loss": 0.0275, "step": 157 }, { "clip_ratio/high_max": 0.002139774202078115, "clip_ratio/high_mean": 0.0008882243528205436, "clip_ratio/low_mean": 0.0008602012130722869, "clip_ratio/low_min": 1.838776188378688e-05, "clip_ratio/region_mean": 0.0017484255804447457, "epoch": 1.5784256559766763, "grad_norm": 0.139420747756958, "learning_rate": 1e-06, "loss": -0.0212, "step": 158 }, { "clip_ratio/high_max": 0.002397394935542252, "clip_ratio/high_mean": 0.0010904006740020122, "clip_ratio/low_mean": 0.001042817508277949, "clip_ratio/low_min": 5.8999250541091897e-05, "clip_ratio/region_mean": 0.0021332182150217704, "epoch": 1.5877551020408163, "grad_norm": 0.11631916463375092, "learning_rate": 1e-06, "loss": -0.0289, "step": 159 }, { "clip_ratio/high_max": 0.002245377858344, "clip_ratio/high_mean": 0.001057976645824965, "clip_ratio/low_mean": 0.0009666882106103003, "clip_ratio/low_min": 3.6743091186508536e-05, "clip_ratio/region_mean": 0.002024664863711223, "epoch": 1.5970845481049563, "grad_norm": 0.11942266672849655, "learning_rate": 1e-06, "loss": -0.0405, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017229352678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 626.25439453125, "completions/mean_terminated_length": 565.4248657226562, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 1.6064139941690962, "grad_norm": 0.13169580698013306, "learning_rate": 1e-06, "loss": -0.0129, "num_tokens": 103211242.0, "reward": 0.5811942219734192, "reward_std": 0.20458707213401794, "rewards/simpleverify_reward/mean": 0.5811942219734192, "rewards/simpleverify_reward/std": 0.4933806359767914, "step": 161 }, { "clip_ratio/high_max": 0.0018166435002058279, "clip_ratio/high_mean": 0.0007460619381163269, "clip_ratio/low_mean": 0.000638976736809127, "clip_ratio/low_min": 5.0837878916354384e-05, "clip_ratio/region_mean": 0.001385038693115348, "epoch": 1.6157434402332362, "grad_norm": 0.1320454329252243, "learning_rate": 1e-06, "loss": 0.039, "step": 162 }, { "clip_ratio/high_max": 0.002020539850491332, "clip_ratio/high_mean": 0.0008916126080293907, "clip_ratio/low_mean": 0.0006810777140344726, "clip_ratio/low_min": 4.205938330414938e-05, "clip_ratio/region_mean": 0.001572690329339821, "epoch": 1.6250728862973762, "grad_norm": 0.11958145350217819, "learning_rate": 1e-06, "loss": 0.0031, "step": 163 }, { "clip_ratio/high_max": 0.00221190313459374, "clip_ratio/high_mean": 0.0009466717528994195, "clip_ratio/low_mean": 0.0006517537076433655, "clip_ratio/low_min": 2.415474136796547e-05, "clip_ratio/region_mean": 0.0015984254860086367, "epoch": 1.634402332361516, "grad_norm": 0.1377788484096527, "learning_rate": 1e-06, "loss": 0.0341, "step": 164 }, { "clip_ratio/high_max": 0.002054684522590833, "clip_ratio/high_mean": 0.0008563596411477192, "clip_ratio/low_mean": 0.0006744181337126065, "clip_ratio/low_min": 1.2262114978511818e-05, "clip_ratio/region_mean": 0.001530777801235672, "epoch": 1.643731778425656, "grad_norm": 0.14793626964092255, "learning_rate": 1e-06, "loss": 0.009, "step": 165 }, { "clip_ratio/high_max": 0.002200584036472719, "clip_ratio/high_mean": 0.0009079221599677112, "clip_ratio/low_mean": 0.0005505128847289598, "clip_ratio/low_min": 2.5492212444078177e-05, "clip_ratio/region_mean": 0.0014584350319637451, "epoch": 1.6530612244897958, "grad_norm": 0.11855659633874893, "learning_rate": 1e-06, "loss": -0.0269, "step": 166 }, { "clip_ratio/high_max": 0.0021026923022873234, "clip_ratio/high_mean": 0.0009449555127503118, "clip_ratio/low_mean": 0.0006610453228859114, "clip_ratio/low_min": 4.259185789123876e-05, "clip_ratio/region_mean": 0.0016060008347267285, "epoch": 1.6623906705539357, "grad_norm": 0.12084218114614487, "learning_rate": 1e-06, "loss": 0.0038, "step": 167 }, { "clip_ratio/high_max": 0.002057164747384377, "clip_ratio/high_mean": 0.000906317429326009, "clip_ratio/low_mean": 0.0008051304685068317, "clip_ratio/low_min": 4.021916174679063e-05, "clip_ratio/region_mean": 0.0017114478832809255, "epoch": 1.6717201166180757, "grad_norm": 0.119272381067276, "learning_rate": 1e-06, "loss": 0.0093, "step": 168 }, { "clip_ratio/high_max": 0.0020466544410737697, "clip_ratio/high_mean": 0.0008973101048468379, "clip_ratio/low_mean": 0.0007674016487726476, "clip_ratio/low_min": 9.861155376711395e-06, "clip_ratio/region_mean": 0.0016647117445245385, "epoch": 1.6810495626822157, "grad_norm": 0.1259709596633911, "learning_rate": 1e-06, "loss": 0.0177, "step": 169 }, { "clip_ratio/high_max": 0.0022445550057454966, "clip_ratio/high_mean": 0.0010060079221148044, "clip_ratio/low_mean": 0.0008812320756987901, "clip_ratio/low_min": 5.3990890592103824e-05, "clip_ratio/region_mean": 0.001887240017822478, "epoch": 1.6903790087463557, "grad_norm": 0.12784640491008759, "learning_rate": 1e-06, "loss": -0.0259, "step": 170 }, { "clip_ratio/high_max": 0.0021894609817536548, "clip_ratio/high_mean": 0.0008671266623423435, "clip_ratio/low_mean": 0.0008860656926117372, "clip_ratio/low_min": 3.362625648151152e-05, "clip_ratio/region_mean": 0.0017531924095237628, "epoch": 1.6997084548104957, "grad_norm": 0.12520450353622437, "learning_rate": 1e-06, "loss": 0.0311, "step": 171 }, { "clip_ratio/high_max": 0.0021982111138640903, "clip_ratio/high_mean": 0.0009408265395904891, "clip_ratio/low_mean": 0.0008430890666204505, "clip_ratio/low_min": 4.45436007794342e-05, "clip_ratio/region_mean": 0.0017839156425907277, "epoch": 1.7090379008746357, "grad_norm": 0.12349167466163635, "learning_rate": 1e-06, "loss": 0.0043, "step": 172 }, { "clip_ratio/high_max": 0.002479846116330009, "clip_ratio/high_mean": 0.0010699710801418405, "clip_ratio/low_mean": 0.0008599229595347424, "clip_ratio/low_min": 0.00010392668446002062, "clip_ratio/region_mean": 0.001929894024215173, "epoch": 1.7183673469387755, "grad_norm": 0.1285819113254547, "learning_rate": 1e-06, "loss": -0.0084, "step": 173 }, { "clip_ratio/high_max": 0.002081625185383018, "clip_ratio/high_mean": 0.0009965480858227238, "clip_ratio/low_mean": 0.0011112326483271318, "clip_ratio/low_min": 9.675911587692099e-05, "clip_ratio/region_mean": 0.0021077807105029933, "epoch": 1.7276967930029155, "grad_norm": 0.12089233845472336, "learning_rate": 1e-06, "loss": 0.0073, "step": 174 }, { "clip_ratio/high_max": 0.002426785562420264, "clip_ratio/high_mean": 0.001048621859808918, "clip_ratio/low_mean": 0.0009528203845547978, "clip_ratio/low_min": 5.592873822024558e-05, "clip_ratio/region_mean": 0.0020014422334497795, "epoch": 1.7370262390670554, "grad_norm": 0.1163436770439148, "learning_rate": 1e-06, "loss": -0.0027, "step": 175 }, { "clip_ratio/high_max": 0.002088989356707316, "clip_ratio/high_mean": 0.0009251941373804584, "clip_ratio/low_mean": 0.0009630561089579714, "clip_ratio/low_min": 0.00010623957678035367, "clip_ratio/region_mean": 0.0018882502481574193, "epoch": 1.7463556851311952, "grad_norm": 0.12206751108169556, "learning_rate": 1e-06, "loss": -0.0017, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020786830357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 647.4921264648438, "completions/mean_terminated_length": 574.286865234375, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.7556851311953352, "grad_norm": 0.13171830773353577, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 112685569.0, "reward": 0.5671038031578064, "reward_std": 0.19675789773464203, "rewards/simpleverify_reward/mean": 0.5671038031578064, "rewards/simpleverify_reward/std": 0.49549391865730286, "step": 177 }, { "clip_ratio/high_max": 0.0018599505237943958, "clip_ratio/high_mean": 0.0008041290438995929, "clip_ratio/low_mean": 0.0006291494328252156, "clip_ratio/low_min": 6.28319066890981e-05, "clip_ratio/region_mean": 0.0014332784739963245, "epoch": 1.7650145772594752, "grad_norm": 0.11030884832143784, "learning_rate": 1e-06, "loss": -0.0225, "step": 178 }, { "clip_ratio/high_max": 0.002045426059339661, "clip_ratio/high_mean": 0.0008600633227615617, "clip_ratio/low_mean": 0.0006418446682801004, "clip_ratio/low_min": 6.908085924806073e-05, "clip_ratio/region_mean": 0.0015019079874036834, "epoch": 1.7743440233236152, "grad_norm": 0.12863300740718842, "learning_rate": 1e-06, "loss": 0.0056, "step": 179 }, { "clip_ratio/high_max": 0.0019250913974246942, "clip_ratio/high_mean": 0.0007687804900342599, "clip_ratio/low_mean": 0.0006291926765698008, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013979731484141666, "epoch": 1.7836734693877552, "grad_norm": 0.12056858837604523, "learning_rate": 1e-06, "loss": 0.0077, "step": 180 }, { "clip_ratio/high_max": 0.0022541451726283412, "clip_ratio/high_mean": 0.0008016723350010579, "clip_ratio/low_mean": 0.0006185975562402746, "clip_ratio/low_min": 6.308463071036385e-05, "clip_ratio/region_mean": 0.0014202698766894173, "epoch": 1.7930029154518952, "grad_norm": 0.15309050679206848, "learning_rate": 1e-06, "loss": -0.0147, "step": 181 }, { "clip_ratio/high_max": 0.0018194957010564394, "clip_ratio/high_mean": 0.0006944321567061706, "clip_ratio/low_mean": 0.0007426109277730575, "clip_ratio/low_min": 7.410918533423683e-05, "clip_ratio/region_mean": 0.0014370430872077122, "epoch": 1.802332361516035, "grad_norm": 0.12839162349700928, "learning_rate": 1e-06, "loss": 0.0465, "step": 182 }, { "clip_ratio/high_max": 0.0018399977816443425, "clip_ratio/high_mean": 0.0007342226945183938, "clip_ratio/low_mean": 0.0006821313309046673, "clip_ratio/low_min": 3.6732286389451474e-05, "clip_ratio/region_mean": 0.001416354047250934, "epoch": 1.811661807580175, "grad_norm": 0.1214296966791153, "learning_rate": 1e-06, "loss": 0.0409, "step": 183 }, { "clip_ratio/high_max": 0.002079776138998568, "clip_ratio/high_mean": 0.0008031514171307208, "clip_ratio/low_mean": 0.0006509840168291703, "clip_ratio/low_min": 5.0035926506097894e-05, "clip_ratio/region_mean": 0.0014541354539687745, "epoch": 1.820991253644315, "grad_norm": 0.12939366698265076, "learning_rate": 1e-06, "loss": 0.0087, "step": 184 }, { "clip_ratio/high_max": 0.0020362052418931853, "clip_ratio/high_mean": 0.0008464112052024575, "clip_ratio/low_mean": 0.0006757761620974634, "clip_ratio/low_min": 5.552329639613163e-05, "clip_ratio/region_mean": 0.0015221873582049739, "epoch": 1.8303206997084547, "grad_norm": 0.11936371773481369, "learning_rate": 1e-06, "loss": 0.0193, "step": 185 }, { "clip_ratio/high_max": 0.0019120335055049509, "clip_ratio/high_mean": 0.0007396703203994548, "clip_ratio/low_mean": 0.0008308130290970439, "clip_ratio/low_min": 4.299291413190076e-05, "clip_ratio/region_mean": 0.0015704833494964987, "epoch": 1.8396501457725947, "grad_norm": 0.12239719927310944, "learning_rate": 1e-06, "loss": 0.0224, "step": 186 }, { "clip_ratio/high_max": 0.002017628590692766, "clip_ratio/high_mean": 0.0008814301163511118, "clip_ratio/low_mean": 0.0008645274610898923, "clip_ratio/low_min": 0.00010835422563104657, "clip_ratio/region_mean": 0.0017459575537941419, "epoch": 1.8489795918367347, "grad_norm": 0.12260110676288605, "learning_rate": 1e-06, "loss": 0.0248, "step": 187 }, { "clip_ratio/high_max": 0.002130532549927011, "clip_ratio/high_mean": 0.0008929371360864025, "clip_ratio/low_mean": 0.0007997854681889294, "clip_ratio/low_min": 3.7062130104459357e-05, "clip_ratio/region_mean": 0.0016927226024563424, "epoch": 1.8583090379008746, "grad_norm": 0.1214810311794281, "learning_rate": 1e-06, "loss": -0.0277, "step": 188 }, { "clip_ratio/high_max": 0.0019278093677712604, "clip_ratio/high_mean": 0.0008075537953118328, "clip_ratio/low_mean": 0.0007974877844389994, "clip_ratio/low_min": 3.335051951580681e-05, "clip_ratio/region_mean": 0.001605041565198917, "epoch": 1.8676384839650146, "grad_norm": 4.330745220184326, "learning_rate": 1e-06, "loss": -0.0052, "step": 189 }, { "clip_ratio/high_max": 0.0018567806255305186, "clip_ratio/high_mean": 0.0008792057269602083, "clip_ratio/low_mean": 0.0009020656252687331, "clip_ratio/low_min": 8.100154173007468e-05, "clip_ratio/region_mean": 0.0017812713631428778, "epoch": 1.8769679300291546, "grad_norm": 0.12339308112859726, "learning_rate": 1e-06, "loss": -0.0333, "step": 190 }, { "clip_ratio/high_max": 0.002015382604440674, "clip_ratio/high_mean": 0.0008786814833001699, "clip_ratio/low_mean": 0.0008616251943749376, "clip_ratio/low_min": 1.2063308531651273e-05, "clip_ratio/region_mean": 0.0017403066958650015, "epoch": 1.8862973760932946, "grad_norm": 0.11965405195951462, "learning_rate": 1e-06, "loss": 0.0142, "step": 191 }, { "clip_ratio/high_max": 0.002128660591552034, "clip_ratio/high_mean": 0.0008519165276084095, "clip_ratio/low_mean": 0.0009376126290590037, "clip_ratio/low_min": 9.386102738062618e-05, "clip_ratio/region_mean": 0.0017895291530294344, "epoch": 1.8956268221574344, "grad_norm": 0.1256842166185379, "learning_rate": 1e-06, "loss": 0.019, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02197265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 650.21923828125, "completions/mean_terminated_length": 572.8052978515625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 2.00932944606414, "grad_norm": 0.12299585342407227, "learning_rate": 1e-06, "loss": -0.0176, "num_tokens": 122170256.0, "reward": 0.588448703289032, "reward_std": 0.1866464912891388, "rewards/simpleverify_reward/mean": 0.5884486436843872, "rewards/simpleverify_reward/std": 0.492131769657135, "step": 193 }, { "clip_ratio/high_max": 0.0017969260188692715, "clip_ratio/high_mean": 0.0006619844243687112, "clip_ratio/low_mean": 0.0005378651221690234, "clip_ratio/low_min": 1.2371338016237132e-05, "clip_ratio/region_mean": 0.0011998495356237981, "epoch": 2.01865889212828, "grad_norm": 0.11642692238092422, "learning_rate": 1e-06, "loss": 0.0173, "step": 194 }, { "clip_ratio/high_max": 0.0020914624765282497, "clip_ratio/high_mean": 0.0009581762478774181, "clip_ratio/low_mean": 0.0006375525626935996, "clip_ratio/low_min": 1.5214216546155512e-05, "clip_ratio/region_mean": 0.0015957288269419223, "epoch": 2.02798833819242, "grad_norm": 0.1323326975107193, "learning_rate": 1e-06, "loss": -0.0108, "step": 195 }, { "clip_ratio/high_max": 0.0020572757930494845, "clip_ratio/high_mean": 0.0008907244355214061, "clip_ratio/low_mean": 0.0005349176026356872, "clip_ratio/low_min": 2.5984691092162393e-05, "clip_ratio/region_mean": 0.001425642036338104, "epoch": 2.03731778425656, "grad_norm": 0.13013271987438202, "learning_rate": 1e-06, "loss": -0.0212, "step": 196 }, { "clip_ratio/high_max": 0.0019667084561660886, "clip_ratio/high_mean": 0.0007802698364685057, "clip_ratio/low_mean": 0.0006356841440720018, "clip_ratio/low_min": 4.5565392611024436e-05, "clip_ratio/region_mean": 0.0014159539387037512, "epoch": 2.0466472303206995, "grad_norm": 0.11156603693962097, "learning_rate": 1e-06, "loss": 0.019, "step": 197 }, { "clip_ratio/high_max": 0.002236728854768444, "clip_ratio/high_mean": 0.0009088753213291056, "clip_ratio/low_mean": 0.0005904163044760935, "clip_ratio/low_min": 1.054496351571288e-05, "clip_ratio/region_mean": 0.001499291629443178, "epoch": 2.0559766763848395, "grad_norm": 0.12803617119789124, "learning_rate": 1e-06, "loss": -0.013, "step": 198 }, { "clip_ratio/high_max": 0.001716661936370656, "clip_ratio/high_mean": 0.000669969835144002, "clip_ratio/low_mean": 0.0007740689561615, "clip_ratio/low_min": 3.456710692262277e-05, "clip_ratio/region_mean": 0.0014440387858485337, "epoch": 2.0653061224489795, "grad_norm": 0.12137104570865631, "learning_rate": 1e-06, "loss": 0.0647, "step": 199 }, { "clip_ratio/high_max": 0.0019471766936476342, "clip_ratio/high_mean": 0.0006857923908683006, "clip_ratio/low_mean": 0.0007367390371655347, "clip_ratio/low_min": 7.230897517729318e-05, "clip_ratio/region_mean": 0.0014225313971110154, "epoch": 2.0746355685131195, "grad_norm": 0.12160708755254745, "learning_rate": 1e-06, "loss": 0.011, "step": 200 }, { "clip_ratio/high_max": 0.002050876217253972, "clip_ratio/high_mean": 0.0008745326904318063, "clip_ratio/low_mean": 0.0006334998397505842, "clip_ratio/low_min": 1.3461124581226613e-05, "clip_ratio/region_mean": 0.0015080325138114858, "epoch": 2.0839650145772595, "grad_norm": 0.1160803884267807, "learning_rate": 1e-06, "loss": -0.0033, "step": 201 }, { "clip_ratio/high_max": 0.0019217862136429176, "clip_ratio/high_mean": 0.000752816415115376, "clip_ratio/low_mean": 0.0007887291449151235, "clip_ratio/low_min": 5.5680521654721815e-05, "clip_ratio/region_mean": 0.0015415455782203935, "epoch": 2.0932944606413995, "grad_norm": 0.123390793800354, "learning_rate": 1e-06, "loss": 0.0322, "step": 202 }, { "clip_ratio/high_max": 0.002155969414161518, "clip_ratio/high_mean": 0.0009735517251101555, "clip_ratio/low_mean": 0.0007605969403812196, "clip_ratio/low_min": 2.97404494631337e-05, "clip_ratio/region_mean": 0.0017341486163786612, "epoch": 2.1026239067055394, "grad_norm": 0.12407834827899933, "learning_rate": 1e-06, "loss": -0.0402, "step": 203 }, { "clip_ratio/high_max": 0.0020341241324786097, "clip_ratio/high_mean": 0.0009022087942867074, "clip_ratio/low_mean": 0.0008137137674566475, "clip_ratio/low_min": 8.232284562836867e-05, "clip_ratio/region_mean": 0.001715922546281945, "epoch": 2.1119533527696794, "grad_norm": 0.12206974625587463, "learning_rate": 1e-06, "loss": -0.0129, "step": 204 }, { "clip_ratio/high_max": 0.002511783597583417, "clip_ratio/high_mean": 0.0009001118978630984, "clip_ratio/low_mean": 0.0008791909640422091, "clip_ratio/low_min": 5.9977593082294334e-05, "clip_ratio/region_mean": 0.0017793028673622757, "epoch": 2.1212827988338194, "grad_norm": 0.12077140808105469, "learning_rate": 1e-06, "loss": 0.0203, "step": 205 }, { "clip_ratio/high_max": 0.002241894166218117, "clip_ratio/high_mean": 0.0009401261449966114, "clip_ratio/low_mean": 0.0008823602329357527, "clip_ratio/low_min": 0.00010714153904700652, "clip_ratio/region_mean": 0.0018224864033982158, "epoch": 2.130612244897959, "grad_norm": 0.1364220827817917, "learning_rate": 1e-06, "loss": -0.0215, "step": 206 }, { "clip_ratio/high_max": 0.0023470415981137194, "clip_ratio/high_mean": 0.0009267795066989493, "clip_ratio/low_mean": 0.0009421103768545436, "clip_ratio/low_min": 3.118762469966896e-05, "clip_ratio/region_mean": 0.001868889870820567, "epoch": 2.139941690962099, "grad_norm": 0.11495406925678253, "learning_rate": 1e-06, "loss": -0.0023, "step": 207 }, { "clip_ratio/high_max": 0.0018818993739841972, "clip_ratio/high_mean": 0.0007800449839123758, "clip_ratio/low_mean": 0.0009628667066863272, "clip_ratio/low_min": 4.031467506138142e-05, "clip_ratio/region_mean": 0.0017429116705898196, "epoch": 2.149271137026239, "grad_norm": 0.115333691239357, "learning_rate": 1e-06, "loss": 0.0149, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0196010044642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 626.8428955078125, "completions/mean_terminated_length": 557.484375, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 2.158600583090379, "grad_norm": 0.127946674823761, "learning_rate": 1e-06, "loss": 0.0546, "num_tokens": 131399963.0, "reward": 0.5862165689468384, "reward_std": 0.18896526098251343, "rewards/simpleverify_reward/mean": 0.5862165093421936, "rewards/simpleverify_reward/std": 0.4925277829170227, "step": 209 }, { "clip_ratio/high_max": 0.0018405082628305536, "clip_ratio/high_mean": 0.0007519934479205403, "clip_ratio/low_mean": 0.0006156871368148131, "clip_ratio/low_min": 5.5476961279055104e-05, "clip_ratio/region_mean": 0.0013676805792783853, "epoch": 2.167930029154519, "grad_norm": 0.12491989880800247, "learning_rate": 1e-06, "loss": 0.0111, "step": 210 }, { "clip_ratio/high_max": 0.0017947599226317834, "clip_ratio/high_mean": 0.0007880264765844913, "clip_ratio/low_mean": 0.0005980106316201272, "clip_ratio/low_min": 3.5438371924101375e-05, "clip_ratio/region_mean": 0.0013860371327609755, "epoch": 2.177259475218659, "grad_norm": 0.1508055329322815, "learning_rate": 1e-06, "loss": -0.0028, "step": 211 }, { "clip_ratio/high_max": 0.002310191615833901, "clip_ratio/high_mean": 0.0009722374124976341, "clip_ratio/low_mean": 0.0005404180737969, "clip_ratio/low_min": 3.940731403417885e-05, "clip_ratio/region_mean": 0.001512655504484428, "epoch": 2.186588921282799, "grad_norm": 0.13316217064857483, "learning_rate": 1e-06, "loss": -0.0287, "step": 212 }, { "clip_ratio/high_max": 0.0020843147904088255, "clip_ratio/high_mean": 0.0008720773694221862, "clip_ratio/low_mean": 0.0005388137651607394, "clip_ratio/low_min": 1.9635563148767687e-05, "clip_ratio/region_mean": 0.001410891127306968, "epoch": 2.195918367346939, "grad_norm": 0.12384217977523804, "learning_rate": 1e-06, "loss": -0.0138, "step": 213 }, { "clip_ratio/high_max": 0.0017938973505806644, "clip_ratio/high_mean": 0.0008469766471534967, "clip_ratio/low_mean": 0.0006467679031629814, "clip_ratio/low_min": 4.03911008106661e-05, "clip_ratio/region_mean": 0.00149374454485951, "epoch": 2.205247813411079, "grad_norm": 0.1277834177017212, "learning_rate": 1e-06, "loss": 0.0082, "step": 214 }, { "clip_ratio/high_max": 0.0019464127872197423, "clip_ratio/high_mean": 0.0007165121387515683, "clip_ratio/low_mean": 0.0006216614783625118, "clip_ratio/low_min": 3.8723668694728985e-05, "clip_ratio/region_mean": 0.0013381736062001437, "epoch": 2.2145772594752184, "grad_norm": 0.1328658014535904, "learning_rate": 1e-06, "loss": 0.022, "step": 215 }, { "clip_ratio/high_max": 0.002124794751580339, "clip_ratio/high_mean": 0.0009609972330508754, "clip_ratio/low_mean": 0.0006012734647811158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015622706778231077, "epoch": 2.2239067055393584, "grad_norm": 0.120405413210392, "learning_rate": 1e-06, "loss": 0.0092, "step": 216 }, { "clip_ratio/high_max": 0.0021815121799590997, "clip_ratio/high_mean": 0.0008828935697238194, "clip_ratio/low_mean": 0.0008162455524143297, "clip_ratio/low_min": 1.3522284461942036e-05, "clip_ratio/region_mean": 0.001699139116681181, "epoch": 2.2332361516034984, "grad_norm": 0.1251181960105896, "learning_rate": 1e-06, "loss": 0.0081, "step": 217 }, { "clip_ratio/high_max": 0.001965037801710423, "clip_ratio/high_mean": 0.0008123564603010891, "clip_ratio/low_mean": 0.0008614639391453238, "clip_ratio/low_min": 4.049728249810869e-05, "clip_ratio/region_mean": 0.001673820399446413, "epoch": 2.2425655976676384, "grad_norm": 0.1266188621520996, "learning_rate": 1e-06, "loss": 0.0161, "step": 218 }, { "clip_ratio/high_max": 0.0019518774643074721, "clip_ratio/high_mean": 0.0008156351941579487, "clip_ratio/low_mean": 0.0008107530356937787, "clip_ratio/low_min": 3.765344081330113e-05, "clip_ratio/region_mean": 0.0016263882025668863, "epoch": 2.2518950437317784, "grad_norm": 0.13375331461429596, "learning_rate": 1e-06, "loss": -0.0079, "step": 219 }, { "clip_ratio/high_max": 0.0020231983871781267, "clip_ratio/high_mean": 0.0008665814930282068, "clip_ratio/low_mean": 0.000828351830932661, "clip_ratio/low_min": 4.753373195853783e-05, "clip_ratio/region_mean": 0.0016949333585216664, "epoch": 2.2612244897959184, "grad_norm": 0.12112519145011902, "learning_rate": 1e-06, "loss": 0.043, "step": 220 }, { "clip_ratio/high_max": 0.0022567470732610673, "clip_ratio/high_mean": 0.0009100745446630754, "clip_ratio/low_mean": 0.0008545086293452187, "clip_ratio/low_min": 4.8257143134833314e-05, "clip_ratio/region_mean": 0.0017645831976551563, "epoch": 2.2705539358600584, "grad_norm": 0.13155855238437653, "learning_rate": 1e-06, "loss": 0.0128, "step": 221 }, { "clip_ratio/high_max": 0.002188900441979058, "clip_ratio/high_mean": 0.0009390644299855921, "clip_ratio/low_mean": 0.0008135513216984691, "clip_ratio/low_min": 1.0272847248415928e-05, "clip_ratio/region_mean": 0.0017526157389511354, "epoch": 2.2798833819241984, "grad_norm": 0.12157576531171799, "learning_rate": 1e-06, "loss": -0.0341, "step": 222 }, { "clip_ratio/high_max": 0.002376636417466216, "clip_ratio/high_mean": 0.0009413403204234783, "clip_ratio/low_mean": 0.0008429393146798247, "clip_ratio/low_min": 1.4568764527211897e-05, "clip_ratio/region_mean": 0.00178427964419825, "epoch": 2.2892128279883384, "grad_norm": 0.12409801781177521, "learning_rate": 1e-06, "loss": -0.0428, "step": 223 }, { "clip_ratio/high_max": 0.0020486188732320443, "clip_ratio/high_mean": 0.0008829443650029134, "clip_ratio/low_mean": 0.0010263558942824602, "clip_ratio/low_min": 9.512057295069098e-05, "clip_ratio/region_mean": 0.0019093002847512253, "epoch": 2.298542274052478, "grad_norm": 0.1301286518573761, "learning_rate": 1e-06, "loss": 0.0344, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019182477678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4020.0, "completions/mean_length": 625.9957885742188, "completions/mean_terminated_length": 558.130615234375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.307871720116618, "grad_norm": 0.14131343364715576, "learning_rate": 1e-06, "loss": 0.0423, "num_tokens": 140672438.0, "reward": 0.5915178656578064, "reward_std": 0.1912863552570343, "rewards/simpleverify_reward/mean": 0.5915178656578064, "rewards/simpleverify_reward/std": 0.49157029390335083, "step": 225 }, { "clip_ratio/high_max": 0.0020893663677270524, "clip_ratio/high_mean": 0.0008944059227360412, "clip_ratio/low_mean": 0.0005063339667685796, "clip_ratio/low_min": 1.215953307109885e-05, "clip_ratio/region_mean": 0.001400739849486854, "epoch": 2.317201166180758, "grad_norm": 0.12987080216407776, "learning_rate": 1e-06, "loss": 0.0168, "step": 226 }, { "clip_ratio/high_max": 0.001944585415913025, "clip_ratio/high_mean": 0.0008411312264797743, "clip_ratio/low_mean": 0.0006431487836380256, "clip_ratio/low_min": 5.300941938912729e-05, "clip_ratio/region_mean": 0.001484280001022853, "epoch": 2.326530612244898, "grad_norm": 0.12687213718891144, "learning_rate": 1e-06, "loss": 0.0162, "step": 227 }, { "clip_ratio/high_max": 0.002219893955043517, "clip_ratio/high_mean": 0.0009019402314152103, "clip_ratio/low_mean": 0.0004488765343921841, "clip_ratio/low_min": 1.4962892237235792e-05, "clip_ratio/region_mean": 0.0013508167539839633, "epoch": 2.335860058309038, "grad_norm": 0.11925653368234634, "learning_rate": 1e-06, "loss": -0.0343, "step": 228 }, { "clip_ratio/high_max": 0.002015854413912166, "clip_ratio/high_mean": 0.0008141142880049301, "clip_ratio/low_mean": 0.0005592276729657897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013733419946220238, "epoch": 2.345189504373178, "grad_norm": 0.11106139421463013, "learning_rate": 1e-06, "loss": 0.0036, "step": 229 }, { "clip_ratio/high_max": 0.001684079641563585, "clip_ratio/high_mean": 0.0006887829113111366, "clip_ratio/low_mean": 0.0006011958757881075, "clip_ratio/low_min": 8.55314629006898e-05, "clip_ratio/region_mean": 0.0012899787798232865, "epoch": 2.354518950437318, "grad_norm": 0.10471905767917633, "learning_rate": 1e-06, "loss": 0.0291, "step": 230 }, { "clip_ratio/high_max": 0.0019000477259396575, "clip_ratio/high_mean": 0.0007887675983511144, "clip_ratio/low_mean": 0.0006899407708260696, "clip_ratio/low_min": 4.5019622120889835e-05, "clip_ratio/region_mean": 0.00147870838191011, "epoch": 2.363848396501458, "grad_norm": 0.13960635662078857, "learning_rate": 1e-06, "loss": 0.0286, "step": 231 }, { "clip_ratio/high_max": 0.002159243569622049, "clip_ratio/high_mean": 0.0008731465077289613, "clip_ratio/low_mean": 0.000738332950277254, "clip_ratio/low_min": 4.7786147661099676e-05, "clip_ratio/region_mean": 0.0016114794379973318, "epoch": 2.373177842565598, "grad_norm": 0.12925006449222565, "learning_rate": 1e-06, "loss": 0.0258, "step": 232 }, { "clip_ratio/high_max": 0.0019904570799553767, "clip_ratio/high_mean": 0.0008516305388184264, "clip_ratio/low_mean": 0.0007260337260959204, "clip_ratio/low_min": 6.974128336878493e-05, "clip_ratio/region_mean": 0.0015776642394484952, "epoch": 2.3825072886297374, "grad_norm": 0.1304120272397995, "learning_rate": 1e-06, "loss": 0.0157, "step": 233 }, { "clip_ratio/high_max": 0.002088487304718001, "clip_ratio/high_mean": 0.0007963547996041598, "clip_ratio/low_mean": 0.00073287453778903, "clip_ratio/low_min": 6.036271406628657e-05, "clip_ratio/region_mean": 0.0015292293137463275, "epoch": 2.3918367346938774, "grad_norm": 0.13490985333919525, "learning_rate": 1e-06, "loss": 0.0144, "step": 234 }, { "clip_ratio/high_max": 0.0021506539269466884, "clip_ratio/high_mean": 0.0008582061964261811, "clip_ratio/low_mean": 0.0007422045273415279, "clip_ratio/low_min": 1.681011235632468e-05, "clip_ratio/region_mean": 0.0016004107346816454, "epoch": 2.4011661807580174, "grad_norm": 0.11899039894342422, "learning_rate": 1e-06, "loss": -0.0068, "step": 235 }, { "clip_ratio/high_max": 0.0023306936273002066, "clip_ratio/high_mean": 0.0010644594731274992, "clip_ratio/low_mean": 0.0008131937574944459, "clip_ratio/low_min": 4.140899909543805e-05, "clip_ratio/region_mean": 0.0018776532160700299, "epoch": 2.4104956268221573, "grad_norm": 0.14129850268363953, "learning_rate": 1e-06, "loss": -0.026, "step": 236 }, { "clip_ratio/high_max": 0.0020028894650749862, "clip_ratio/high_mean": 0.0008807071035334957, "clip_ratio/low_mean": 0.0008234550132328877, "clip_ratio/low_min": 8.710111342224991e-05, "clip_ratio/region_mean": 0.001704162117675878, "epoch": 2.4198250728862973, "grad_norm": 0.1280832141637802, "learning_rate": 1e-06, "loss": -0.0076, "step": 237 }, { "clip_ratio/high_max": 0.0020273738118703477, "clip_ratio/high_mean": 0.000878430577358813, "clip_ratio/low_mean": 0.0006784553224861156, "clip_ratio/low_min": 1.3182872862671502e-05, "clip_ratio/region_mean": 0.0015568859234917909, "epoch": 2.4291545189504373, "grad_norm": 0.12247508019208908, "learning_rate": 1e-06, "loss": -0.0283, "step": 238 }, { "clip_ratio/high_max": 0.0019773109088419005, "clip_ratio/high_mean": 0.000901135021194932, "clip_ratio/low_mean": 0.0009124619500653353, "clip_ratio/low_min": 2.9308323064469732e-05, "clip_ratio/region_mean": 0.0018135969803552143, "epoch": 2.4384839650145773, "grad_norm": 0.12397079914808273, "learning_rate": 1e-06, "loss": 0.0136, "step": 239 }, { "clip_ratio/high_max": 0.00245932859252207, "clip_ratio/high_mean": 0.0011219963307667058, "clip_ratio/low_mean": 0.0008688222114869859, "clip_ratio/low_min": 8.091663949016947e-05, "clip_ratio/region_mean": 0.0019908185458916705, "epoch": 2.4478134110787173, "grad_norm": 0.1418963074684143, "learning_rate": 1e-06, "loss": -0.0252, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021275111607142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4089.0, "completions/mean_length": 646.0219116210938, "completions/mean_terminated_length": 571.0277099609375, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 2.4571428571428573, "grad_norm": 0.1328268051147461, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 150136368.0, "reward": 0.5865653157234192, "reward_std": 0.18658261001110077, "rewards/simpleverify_reward/mean": 0.5865653157234192, "rewards/simpleverify_reward/std": 0.49246662855148315, "step": 241 }, { "clip_ratio/high_max": 0.0016963839734671637, "clip_ratio/high_mean": 0.000653330776913208, "clip_ratio/low_mean": 0.0005410582825788879, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011943890785914846, "epoch": 2.466472303206997, "grad_norm": 0.10782972723245621, "learning_rate": 1e-06, "loss": 0.038, "step": 242 }, { "clip_ratio/high_max": 0.0018493935349397361, "clip_ratio/high_mean": 0.0007632574506715173, "clip_ratio/low_mean": 0.0005517133176908828, "clip_ratio/low_min": 3.626738271123031e-05, "clip_ratio/region_mean": 0.001314970762905432, "epoch": 2.4758017492711373, "grad_norm": 0.13881301879882812, "learning_rate": 1e-06, "loss": 0.0216, "step": 243 }, { "clip_ratio/high_max": 0.001899774568300927, "clip_ratio/high_mean": 0.0008786041598796146, "clip_ratio/low_mean": 0.0005790085706394166, "clip_ratio/low_min": 1.4845605619484559e-05, "clip_ratio/region_mean": 0.0014576127287000418, "epoch": 2.485131195335277, "grad_norm": 0.1238367035984993, "learning_rate": 1e-06, "loss": -0.0137, "step": 244 }, { "clip_ratio/high_max": 0.002055424549325835, "clip_ratio/high_mean": 0.0007818924659659388, "clip_ratio/low_mean": 0.0005361020912459935, "clip_ratio/low_min": 1.1604159226408228e-05, "clip_ratio/region_mean": 0.0013179945744923316, "epoch": 2.494460641399417, "grad_norm": 0.1407696157693863, "learning_rate": 1e-06, "loss": -0.0205, "step": 245 }, { "clip_ratio/high_max": 0.0017845065449364483, "clip_ratio/high_mean": 0.0008011355002963683, "clip_ratio/low_mean": 0.0006124816409283085, "clip_ratio/low_min": 3.302741424704436e-05, "clip_ratio/region_mean": 0.0014136171266727615, "epoch": 2.503790087463557, "grad_norm": 0.13378280401229858, "learning_rate": 1e-06, "loss": 0.0237, "step": 246 }, { "clip_ratio/high_max": 0.0019432942535786424, "clip_ratio/high_mean": 0.0007908724255685229, "clip_ratio/low_mean": 0.0006734944618074223, "clip_ratio/low_min": 4.794352389581036e-05, "clip_ratio/region_mean": 0.0014643668982898816, "epoch": 2.513119533527697, "grad_norm": 0.12905092537403107, "learning_rate": 1e-06, "loss": -0.0487, "step": 247 }, { "clip_ratio/high_max": 0.0017310272814938799, "clip_ratio/high_mean": 0.0007167013482103357, "clip_ratio/low_mean": 0.0007129334462661063, "clip_ratio/low_min": 0.00010517258488107473, "clip_ratio/region_mean": 0.0014296348090283573, "epoch": 2.522448979591837, "grad_norm": 0.12245016545057297, "learning_rate": 1e-06, "loss": 0.0449, "step": 248 }, { "clip_ratio/high_max": 0.002147627223166637, "clip_ratio/high_mean": 0.000824724484118633, "clip_ratio/low_mean": 0.0008833127139951102, "clip_ratio/low_min": 6.68258908262942e-05, "clip_ratio/region_mean": 0.001708037241769489, "epoch": 2.5317784256559768, "grad_norm": 0.1419130116701126, "learning_rate": 1e-06, "loss": 0.0504, "step": 249 }, { "clip_ratio/high_max": 0.0021055199904367328, "clip_ratio/high_mean": 0.0008955449757195311, "clip_ratio/low_mean": 0.0007742406469333218, "clip_ratio/low_min": 2.5578027816663962e-05, "clip_ratio/region_mean": 0.0016697856153768953, "epoch": 2.5411078717201168, "grad_norm": 0.12226957827806473, "learning_rate": 1e-06, "loss": -0.0228, "step": 250 }, { "clip_ratio/high_max": 0.0019203076080884784, "clip_ratio/high_mean": 0.0008266956265288172, "clip_ratio/low_mean": 0.0008018473417905625, "clip_ratio/low_min": 8.694459938851651e-05, "clip_ratio/region_mean": 0.001628542973776348, "epoch": 2.5504373177842563, "grad_norm": 0.11424940079450607, "learning_rate": 1e-06, "loss": -0.0136, "step": 251 }, { "clip_ratio/high_max": 0.0020320543699199334, "clip_ratio/high_mean": 0.0008175016519089695, "clip_ratio/low_mean": 0.0008323727306560613, "clip_ratio/low_min": 1.4664477021142375e-05, "clip_ratio/region_mean": 0.0016498743134434335, "epoch": 2.5597667638483967, "grad_norm": 0.13212308287620544, "learning_rate": 1e-06, "loss": -0.0032, "step": 252 }, { "clip_ratio/high_max": 0.00203395755852398, "clip_ratio/high_mean": 0.0008580097878621018, "clip_ratio/low_mean": 0.0009081909702217672, "clip_ratio/low_min": 3.933630523533793e-05, "clip_ratio/region_mean": 0.0017662007740000263, "epoch": 2.5690962099125363, "grad_norm": 0.12644504010677338, "learning_rate": 1e-06, "loss": -0.0096, "step": 253 }, { "clip_ratio/high_max": 0.0020600497809937224, "clip_ratio/high_mean": 0.0008304304583361954, "clip_ratio/low_mean": 0.0010192494264629204, "clip_ratio/low_min": 2.925382250396069e-05, "clip_ratio/region_mean": 0.0018496798948035575, "epoch": 2.5784256559766763, "grad_norm": 0.12922652065753937, "learning_rate": 1e-06, "loss": -0.0021, "step": 254 }, { "clip_ratio/high_max": 0.0018994899146491662, "clip_ratio/high_mean": 0.0008104877979349112, "clip_ratio/low_mean": 0.000960200568442815, "clip_ratio/low_min": 3.165467660437571e-05, "clip_ratio/region_mean": 0.0017706883736536838, "epoch": 2.5877551020408163, "grad_norm": 0.11821028590202332, "learning_rate": 1e-06, "loss": 0.0086, "step": 255 }, { "clip_ratio/high_max": 0.0022747216426068917, "clip_ratio/high_mean": 0.0009415554468432674, "clip_ratio/low_mean": 0.0007336103681154782, "clip_ratio/low_min": 7.640586773050018e-06, "clip_ratio/region_mean": 0.0016751658367866185, "epoch": 2.5970845481049563, "grad_norm": 0.1256120651960373, "learning_rate": 1e-06, "loss": -0.0335, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.021275111607142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 644.238525390625, "completions/mean_terminated_length": 569.2055053710938, "completions/min_length": 65.0, "completions/min_terminated_length": 65.0, "epoch": 2.6064139941690962, "grad_norm": 0.1304124891757965, "learning_rate": 1e-06, "loss": -0.0376, "num_tokens": 159549835.0, "reward": 0.59033203125, "reward_std": 0.19320522248744965, "rewards/simpleverify_reward/mean": 0.59033203125, "rewards/simpleverify_reward/std": 0.4917895793914795, "step": 257 }, { "clip_ratio/high_max": 0.0018062403760268353, "clip_ratio/high_mean": 0.0008192590503313113, "clip_ratio/low_mean": 0.0006084460856072837, "clip_ratio/low_min": 4.1014804992300924e-05, "clip_ratio/region_mean": 0.0014277051232056692, "epoch": 2.6157434402332362, "grad_norm": 0.12776902318000793, "learning_rate": 1e-06, "loss": 0.0167, "step": 258 }, { "clip_ratio/high_max": 0.0017936293734237552, "clip_ratio/high_mean": 0.0007644012166565517, "clip_ratio/low_mean": 0.0006231336810742505, "clip_ratio/low_min": 1.240325491380645e-05, "clip_ratio/region_mean": 0.00138753487408394, "epoch": 2.6250728862973762, "grad_norm": 0.14050258696079254, "learning_rate": 1e-06, "loss": 0.0202, "step": 259 }, { "clip_ratio/high_max": 0.001799909870896954, "clip_ratio/high_mean": 0.0007734462451480795, "clip_ratio/low_mean": 0.0005308423369569937, "clip_ratio/low_min": 6.151170509838266e-05, "clip_ratio/region_mean": 0.0013042885984759778, "epoch": 2.6344023323615158, "grad_norm": 0.12463710457086563, "learning_rate": 1e-06, "loss": 0.0185, "step": 260 }, { "clip_ratio/high_max": 0.0020928424564772286, "clip_ratio/high_mean": 0.0007757941311865579, "clip_ratio/low_mean": 0.000672125108394539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014479192541330121, "epoch": 2.643731778425656, "grad_norm": 0.1476905792951584, "learning_rate": 1e-06, "loss": 0.0295, "step": 261 }, { "clip_ratio/high_max": 0.0017743981916282792, "clip_ratio/high_mean": 0.0006948119062144542, "clip_ratio/low_mean": 0.0005997330063109985, "clip_ratio/low_min": 3.0183517992554698e-05, "clip_ratio/region_mean": 0.0012945449016115163, "epoch": 2.6530612244897958, "grad_norm": 0.12387193739414215, "learning_rate": 1e-06, "loss": 0.0232, "step": 262 }, { "clip_ratio/high_max": 0.0021343884254747536, "clip_ratio/high_mean": 0.0008853844447003212, "clip_ratio/low_mean": 0.0007057667135086376, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015911511654849164, "epoch": 2.6623906705539357, "grad_norm": 0.1529926061630249, "learning_rate": 1e-06, "loss": -0.0181, "step": 263 }, { "clip_ratio/high_max": 0.0021504866308532655, "clip_ratio/high_mean": 0.0009263995179935591, "clip_ratio/low_mean": 0.0007335137270274572, "clip_ratio/low_min": 3.746753372979583e-05, "clip_ratio/region_mean": 0.0016599131922703236, "epoch": 2.6717201166180757, "grad_norm": 0.13446612656116486, "learning_rate": 1e-06, "loss": -0.003, "step": 264 }, { "clip_ratio/high_max": 0.0018246835061290767, "clip_ratio/high_mean": 0.0006965208340261597, "clip_ratio/low_mean": 0.0007292516884263023, "clip_ratio/low_min": 8.711582086107228e-05, "clip_ratio/region_mean": 0.001425772537913872, "epoch": 2.6810495626822157, "grad_norm": 0.1314975917339325, "learning_rate": 1e-06, "loss": 0.0241, "step": 265 }, { "clip_ratio/high_max": 0.001823956783482572, "clip_ratio/high_mean": 0.00071158493119583, "clip_ratio/low_mean": 0.0008301709422084969, "clip_ratio/low_min": 1.6237983800238e-05, "clip_ratio/region_mean": 0.001541755860671401, "epoch": 2.6903790087463557, "grad_norm": 0.12952229380607605, "learning_rate": 1e-06, "loss": 0.0116, "step": 266 }, { "clip_ratio/high_max": 0.0021106850035721436, "clip_ratio/high_mean": 0.0008754036534810439, "clip_ratio/low_mean": 0.0007911225438874681, "clip_ratio/low_min": 2.5621274289733265e-05, "clip_ratio/region_mean": 0.001666526215558406, "epoch": 2.6997084548104957, "grad_norm": 0.12006521970033646, "learning_rate": 1e-06, "loss": 0.006, "step": 267 }, { "clip_ratio/high_max": 0.002235565392766148, "clip_ratio/high_mean": 0.0008764523045101669, "clip_ratio/low_mean": 0.0009528008777124342, "clip_ratio/low_min": 7.211607771751005e-05, "clip_ratio/region_mean": 0.0018292531894985586, "epoch": 2.7090379008746357, "grad_norm": 0.13169372081756592, "learning_rate": 1e-06, "loss": 0.012, "step": 268 }, { "clip_ratio/high_max": 0.0021535555461014155, "clip_ratio/high_mean": 0.0009062049348358414, "clip_ratio/low_mean": 0.000739204037017771, "clip_ratio/low_min": 3.741768159670755e-05, "clip_ratio/region_mean": 0.0016454089636681601, "epoch": 2.7183673469387752, "grad_norm": 0.12983812391757965, "learning_rate": 1e-06, "loss": 0.0036, "step": 269 }, { "clip_ratio/high_max": 0.0017459976406826172, "clip_ratio/high_mean": 0.0007967498950165464, "clip_ratio/low_mean": 0.0009587883523636265, "clip_ratio/low_min": 4.3287151129334234e-05, "clip_ratio/region_mean": 0.0017555382510181516, "epoch": 2.7276967930029157, "grad_norm": 0.12162166088819504, "learning_rate": 1e-06, "loss": 0.0118, "step": 270 }, { "clip_ratio/high_max": 0.002034960671153385, "clip_ratio/high_mean": 0.0007998027249414008, "clip_ratio/low_mean": 0.000780499622123898, "clip_ratio/low_min": 3.476084384601563e-05, "clip_ratio/region_mean": 0.001580302334332373, "epoch": 2.7370262390670552, "grad_norm": 0.1412871927022934, "learning_rate": 1e-06, "loss": -0.044, "step": 271 }, { "clip_ratio/high_max": 0.0019165245321346447, "clip_ratio/high_mean": 0.0008439558205282083, "clip_ratio/low_mean": 0.0008916646002035122, "clip_ratio/low_min": 2.3156724637374282e-05, "clip_ratio/region_mean": 0.0017356204116367735, "epoch": 2.746355685131195, "grad_norm": 0.1176559329032898, "learning_rate": 1e-06, "loss": 0.0167, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02099609375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 644.6810302734375, "completions/mean_terminated_length": 570.6627197265625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 2.755685131195335, "grad_norm": 0.1348816156387329, "learning_rate": 1e-06, "loss": 0.0238, "num_tokens": 168956486.0, "reward": 0.5862863063812256, "reward_std": 0.18844912946224213, "rewards/simpleverify_reward/mean": 0.5862862467765808, "rewards/simpleverify_reward/std": 0.49251556396484375, "step": 273 }, { "clip_ratio/high_max": 0.002090372930979356, "clip_ratio/high_mean": 0.0008250843948189868, "clip_ratio/low_mean": 0.0005652272529914626, "clip_ratio/low_min": 5.174675607122481e-05, "clip_ratio/region_mean": 0.001390311648719944, "epoch": 2.765014577259475, "grad_norm": 0.13565286993980408, "learning_rate": 1e-06, "loss": 0.017, "step": 274 }, { "clip_ratio/high_max": 0.001964692895853659, "clip_ratio/high_mean": 0.0007914844190963777, "clip_ratio/low_mean": 0.0005804553975394811, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013719398230023216, "epoch": 2.774344023323615, "grad_norm": 0.12759877741336823, "learning_rate": 1e-06, "loss": 0.0323, "step": 275 }, { "clip_ratio/high_max": 0.0017323568499705289, "clip_ratio/high_mean": 0.0007510256491514156, "clip_ratio/low_mean": 0.0006105838929215679, "clip_ratio/low_min": 8.988925401354209e-06, "clip_ratio/region_mean": 0.0013616095093311742, "epoch": 2.783673469387755, "grad_norm": 0.12866930663585663, "learning_rate": 1e-06, "loss": 0.0162, "step": 276 }, { "clip_ratio/high_max": 0.0020385889838507865, "clip_ratio/high_mean": 0.0008619709460617742, "clip_ratio/low_mean": 0.0005276306510495488, "clip_ratio/low_min": 2.680677607713733e-05, "clip_ratio/region_mean": 0.0013896016134822275, "epoch": 2.793002915451895, "grad_norm": 0.13702957332134247, "learning_rate": 1e-06, "loss": -0.0057, "step": 277 }, { "clip_ratio/high_max": 0.001990117012610426, "clip_ratio/high_mean": 0.0008273708263004664, "clip_ratio/low_mean": 0.0006158271480671829, "clip_ratio/low_min": 1.567594699736219e-05, "clip_ratio/region_mean": 0.0014431979470828082, "epoch": 2.8023323615160347, "grad_norm": 0.12204594910144806, "learning_rate": 1e-06, "loss": 0.0192, "step": 278 }, { "clip_ratio/high_max": 0.0019334724711370654, "clip_ratio/high_mean": 0.0008531080165994354, "clip_ratio/low_mean": 0.0007378535847237799, "clip_ratio/low_min": 4.6744961764488835e-05, "clip_ratio/region_mean": 0.0015909615976852365, "epoch": 2.811661807580175, "grad_norm": 0.12626740336418152, "learning_rate": 1e-06, "loss": -0.0011, "step": 279 }, { "clip_ratio/high_max": 0.0019001436921826098, "clip_ratio/high_mean": 0.0007994910738489125, "clip_ratio/low_mean": 0.0006251769964364939, "clip_ratio/low_min": 3.790584742091596e-05, "clip_ratio/region_mean": 0.0014246680766518693, "epoch": 2.8209912536443147, "grad_norm": 0.15935376286506653, "learning_rate": 1e-06, "loss": 0.0292, "step": 280 }, { "clip_ratio/high_max": 0.0021244433883111924, "clip_ratio/high_mean": 0.0009392931278853212, "clip_ratio/low_mean": 0.0007062049953674432, "clip_ratio/low_min": 5.674222393281525e-05, "clip_ratio/region_mean": 0.001645498148718616, "epoch": 2.8303206997084547, "grad_norm": 0.13014677166938782, "learning_rate": 1e-06, "loss": -0.0251, "step": 281 }, { "clip_ratio/high_max": 0.0018315842680749483, "clip_ratio/high_mean": 0.0008626417329651304, "clip_ratio/low_mean": 0.0007320784388866741, "clip_ratio/low_min": 3.2835613637871575e-05, "clip_ratio/region_mean": 0.0015947201463859528, "epoch": 2.8396501457725947, "grad_norm": 0.11823304742574692, "learning_rate": 1e-06, "loss": -0.0007, "step": 282 }, { "clip_ratio/high_max": 0.0020985955707146786, "clip_ratio/high_mean": 0.0008642544362373883, "clip_ratio/low_mean": 0.000750296992919175, "clip_ratio/low_min": 6.234263673832174e-05, "clip_ratio/region_mean": 0.0016145514309755526, "epoch": 2.8489795918367347, "grad_norm": 0.12496565282344818, "learning_rate": 1e-06, "loss": 0.0114, "step": 283 }, { "clip_ratio/high_max": 0.0020798798941541463, "clip_ratio/high_mean": 0.0009208399496856146, "clip_ratio/low_mean": 0.0007529819777118973, "clip_ratio/low_min": 1.4991604984970763e-05, "clip_ratio/region_mean": 0.0016738219201215543, "epoch": 2.8583090379008746, "grad_norm": 0.13007360696792603, "learning_rate": 1e-06, "loss": -0.0289, "step": 284 }, { "clip_ratio/high_max": 0.0017058213124983013, "clip_ratio/high_mean": 0.0008043526104302146, "clip_ratio/low_mean": 0.0007588454736833228, "clip_ratio/low_min": 5.0952465244336054e-05, "clip_ratio/region_mean": 0.0015631980859325267, "epoch": 2.8676384839650146, "grad_norm": 0.12688350677490234, "learning_rate": 1e-06, "loss": -0.0126, "step": 285 }, { "clip_ratio/high_max": 0.002237947497633286, "clip_ratio/high_mean": 0.0010190133943979163, "clip_ratio/low_mean": 0.0007530581024184357, "clip_ratio/low_min": 4.439351505425293e-05, "clip_ratio/region_mean": 0.0017720715040923096, "epoch": 2.8769679300291546, "grad_norm": 0.13257132470607758, "learning_rate": 1e-06, "loss": -0.0314, "step": 286 }, { "clip_ratio/high_max": 0.0020596675676642917, "clip_ratio/high_mean": 0.000908176716620801, "clip_ratio/low_mean": 0.0007357789490924915, "clip_ratio/low_min": 2.6835552489501424e-05, "clip_ratio/region_mean": 0.0016439556566183455, "epoch": 2.8862973760932946, "grad_norm": 0.13302135467529297, "learning_rate": 1e-06, "loss": -0.0072, "step": 287 }, { "clip_ratio/high_max": 0.002004908961680485, "clip_ratio/high_mean": 0.0007842058166716015, "clip_ratio/low_mean": 0.0008659140330564696, "clip_ratio/low_min": 6.274481165746693e-05, "clip_ratio/region_mean": 0.0016501198479090817, "epoch": 2.8956268221574346, "grad_norm": 0.13508276641368866, "learning_rate": 1e-06, "loss": 0.0351, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025041852678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 654.7391967773438, "completions/mean_terminated_length": 566.3502197265625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 3.00932944606414, "grad_norm": 0.13060501217842102, "learning_rate": 1e-06, "loss": -0.0326, "num_tokens": 178306451.0, "reward": 0.6003767251968384, "reward_std": 0.17944417893886566, "rewards/simpleverify_reward/mean": 0.6003766655921936, "rewards/simpleverify_reward/std": 0.48983800411224365, "step": 289 }, { "clip_ratio/high_max": 0.0015473846251552459, "clip_ratio/high_mean": 0.0006803231772209983, "clip_ratio/low_mean": 0.0005371813640522305, "clip_ratio/low_min": 1.7289074094151147e-05, "clip_ratio/region_mean": 0.0012175045667390805, "epoch": 3.01865889212828, "grad_norm": 0.12463854253292084, "learning_rate": 1e-06, "loss": 0.0137, "step": 290 }, { "clip_ratio/high_max": 0.0018839495241991244, "clip_ratio/high_mean": 0.0007413483381242258, "clip_ratio/low_mean": 0.000613599176176649, "clip_ratio/low_min": 2.880552983697271e-05, "clip_ratio/region_mean": 0.001354947489744518, "epoch": 3.02798833819242, "grad_norm": 0.11187395453453064, "learning_rate": 1e-06, "loss": -0.003, "step": 291 }, { "clip_ratio/high_max": 0.0017556358397996519, "clip_ratio/high_mean": 0.0006978545843594475, "clip_ratio/low_mean": 0.00045630516615347005, "clip_ratio/low_min": 1.4813937013968825e-05, "clip_ratio/region_mean": 0.001154159730504034, "epoch": 3.03731778425656, "grad_norm": 0.12508447468280792, "learning_rate": 1e-06, "loss": -0.0271, "step": 292 }, { "clip_ratio/high_max": 0.0019594388322730083, "clip_ratio/high_mean": 0.0007714561270404374, "clip_ratio/low_mean": 0.0005074566615803633, "clip_ratio/low_min": 8.626639100839384e-06, "clip_ratio/region_mean": 0.0012789128086296842, "epoch": 3.0466472303206995, "grad_norm": 0.11480789631605148, "learning_rate": 1e-06, "loss": -0.0052, "step": 293 }, { "clip_ratio/high_max": 0.001886590111098485, "clip_ratio/high_mean": 0.0007755546848784434, "clip_ratio/low_mean": 0.0005511971212399658, "clip_ratio/low_min": 2.4461840439471416e-05, "clip_ratio/region_mean": 0.001326751800661441, "epoch": 3.0559766763848395, "grad_norm": 0.12108650803565979, "learning_rate": 1e-06, "loss": -0.0094, "step": 294 }, { "clip_ratio/high_max": 0.0021480107607203536, "clip_ratio/high_mean": 0.0009596726195013616, "clip_ratio/low_mean": 0.0005933992151767598, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015530718374066055, "epoch": 3.0653061224489795, "grad_norm": 0.131123885512352, "learning_rate": 1e-06, "loss": -0.0376, "step": 295 }, { "clip_ratio/high_max": 0.0019967166444985196, "clip_ratio/high_mean": 0.0007857234013499692, "clip_ratio/low_mean": 0.0005732056633860338, "clip_ratio/low_min": 2.508025681891013e-05, "clip_ratio/region_mean": 0.0013589290683739819, "epoch": 3.0746355685131195, "grad_norm": 0.12411342561244965, "learning_rate": 1e-06, "loss": -0.002, "step": 296 }, { "clip_ratio/high_max": 0.0021143675185157917, "clip_ratio/high_mean": 0.0008454385479126358, "clip_ratio/low_mean": 0.000666111252940027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015115497953956947, "epoch": 3.0839650145772595, "grad_norm": 0.13811255991458893, "learning_rate": 1e-06, "loss": -0.003, "step": 297 }, { "clip_ratio/high_max": 0.002002229135541711, "clip_ratio/high_mean": 0.0008431316236965358, "clip_ratio/low_mean": 0.0008048149848036701, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016479466139571741, "epoch": 3.0932944606413995, "grad_norm": 0.13416419923305511, "learning_rate": 1e-06, "loss": 0.0443, "step": 298 }, { "clip_ratio/high_max": 0.0021894954043091275, "clip_ratio/high_mean": 0.0009245567780453712, "clip_ratio/low_mean": 0.0007333329012908507, "clip_ratio/low_min": 2.2944199372432195e-05, "clip_ratio/region_mean": 0.0016578897048020735, "epoch": 3.1026239067055394, "grad_norm": 0.1439695805311203, "learning_rate": 1e-06, "loss": -0.0147, "step": 299 }, { "clip_ratio/high_max": 0.0020652613311540335, "clip_ratio/high_mean": 0.0009515095807728358, "clip_ratio/low_mean": 0.0006689447982353158, "clip_ratio/low_min": 3.536067742970772e-05, "clip_ratio/region_mean": 0.001620454389922088, "epoch": 3.1119533527696794, "grad_norm": 0.11474190652370453, "learning_rate": 1e-06, "loss": -0.0145, "step": 300 }, { "clip_ratio/high_max": 0.0023675255579291843, "clip_ratio/high_mean": 0.0009787994258658728, "clip_ratio/low_mean": 0.0008052302928263089, "clip_ratio/low_min": 2.6561836421024054e-05, "clip_ratio/region_mean": 0.0017840296786744148, "epoch": 3.1212827988338194, "grad_norm": 0.13204632699489594, "learning_rate": 1e-06, "loss": -0.0396, "step": 301 }, { "clip_ratio/high_max": 0.002338104859518353, "clip_ratio/high_mean": 0.00087397830975533, "clip_ratio/low_mean": 0.0008851653401507065, "clip_ratio/low_min": 0.00011742644164769445, "clip_ratio/region_mean": 0.0017591436335351318, "epoch": 3.130612244897959, "grad_norm": 0.1468368023633957, "learning_rate": 1e-06, "loss": 0.0285, "step": 302 }, { "clip_ratio/high_max": 0.0021943984738754807, "clip_ratio/high_mean": 0.0008761826625232061, "clip_ratio/low_mean": 0.0009657405971665867, "clip_ratio/low_min": 0.00010738772016338771, "clip_ratio/region_mean": 0.0018419232583255507, "epoch": 3.139941690962099, "grad_norm": 0.14142325520515442, "learning_rate": 1e-06, "loss": 0.0468, "step": 303 }, { "clip_ratio/high_max": 0.001938758468895685, "clip_ratio/high_mean": 0.000763442124480207, "clip_ratio/low_mean": 0.0007295127775250876, "clip_ratio/low_min": 2.2486057787318714e-05, "clip_ratio/region_mean": 0.0014929548669897486, "epoch": 3.149271137026239, "grad_norm": 0.1148410513997078, "learning_rate": 1e-06, "loss": -0.0114, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.027064732142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 683.8236694335938, "completions/mean_terminated_length": 588.905029296875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 3.158600583090379, "grad_norm": 0.14269407093524933, "learning_rate": 1e-06, "loss": 0.0222, "num_tokens": 187986619.0, "reward": 0.577218234539032, "reward_std": 0.18078188598155975, "rewards/simpleverify_reward/mean": 0.5772181749343872, "rewards/simpleverify_reward/std": 0.4940185844898224, "step": 305 }, { "clip_ratio/high_max": 0.0017699811141937971, "clip_ratio/high_mean": 0.0006905598511366406, "clip_ratio/low_mean": 0.0004919505208818009, "clip_ratio/low_min": 4.929053466184996e-05, "clip_ratio/region_mean": 0.0011825103611045051, "epoch": 3.167930029154519, "grad_norm": 0.14001056551933289, "learning_rate": 1e-06, "loss": 0.0104, "step": 306 }, { "clip_ratio/high_max": 0.0017237623578694183, "clip_ratio/high_mean": 0.0006980688813200686, "clip_ratio/low_mean": 0.0005532092454814119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012512781358964276, "epoch": 3.177259475218659, "grad_norm": 0.13213996589183807, "learning_rate": 1e-06, "loss": 0.0109, "step": 307 }, { "clip_ratio/high_max": 0.0020113985374337062, "clip_ratio/high_mean": 0.0007974783984536771, "clip_ratio/low_mean": 0.0005560520176004502, "clip_ratio/low_min": 1.270325174118625e-05, "clip_ratio/region_mean": 0.0013535304096876644, "epoch": 3.186588921282799, "grad_norm": 0.12336797267198563, "learning_rate": 1e-06, "loss": -0.0084, "step": 308 }, { "clip_ratio/high_max": 0.0020407320334925316, "clip_ratio/high_mean": 0.0008024697108339751, "clip_ratio/low_mean": 0.000546275142369268, "clip_ratio/low_min": 1.3403388038568664e-05, "clip_ratio/region_mean": 0.0013487448341038544, "epoch": 3.195918367346939, "grad_norm": 0.1353670060634613, "learning_rate": 1e-06, "loss": -0.0128, "step": 309 }, { "clip_ratio/high_max": 0.0017013272627082188, "clip_ratio/high_mean": 0.0006256370588744176, "clip_ratio/low_mean": 0.000592147046518221, "clip_ratio/low_min": 2.3379052436212078e-05, "clip_ratio/region_mean": 0.0012177840999356704, "epoch": 3.205247813411079, "grad_norm": 0.11926306784152985, "learning_rate": 1e-06, "loss": 0.0277, "step": 310 }, { "clip_ratio/high_max": 0.001658606830460485, "clip_ratio/high_mean": 0.0007854116192902438, "clip_ratio/low_mean": 0.0006315843620541273, "clip_ratio/low_min": 2.7882195354322903e-05, "clip_ratio/region_mean": 0.0014169959868013393, "epoch": 3.2145772594752184, "grad_norm": 0.13148078322410583, "learning_rate": 1e-06, "loss": -0.0104, "step": 311 }, { "clip_ratio/high_max": 0.002016498699958902, "clip_ratio/high_mean": 0.0008114361226034816, "clip_ratio/low_mean": 0.0006352941618388286, "clip_ratio/low_min": 1.3042570571997203e-05, "clip_ratio/region_mean": 0.0014467302898992784, "epoch": 3.2239067055393584, "grad_norm": 0.1220783144235611, "learning_rate": 1e-06, "loss": -0.0136, "step": 312 }, { "clip_ratio/high_max": 0.0019337129851919599, "clip_ratio/high_mean": 0.0007791861899022479, "clip_ratio/low_mean": 0.0006738671345374314, "clip_ratio/low_min": 2.866693466785364e-05, "clip_ratio/region_mean": 0.0014530533153447323, "epoch": 3.2332361516034984, "grad_norm": 0.1157480776309967, "learning_rate": 1e-06, "loss": -0.0285, "step": 313 }, { "clip_ratio/high_max": 0.0019657045777421445, "clip_ratio/high_mean": 0.000733011827833252, "clip_ratio/low_mean": 0.0007328845276788343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014658963773399591, "epoch": 3.2425655976676384, "grad_norm": 0.11224015802145004, "learning_rate": 1e-06, "loss": 0.0006, "step": 314 }, { "clip_ratio/high_max": 0.002447935701638926, "clip_ratio/high_mean": 0.0009402799005329143, "clip_ratio/low_mean": 0.0007534241103712702, "clip_ratio/low_min": 4.6636660044896416e-05, "clip_ratio/region_mean": 0.0016937039799813647, "epoch": 3.2518950437317784, "grad_norm": 0.12600477039813995, "learning_rate": 1e-06, "loss": 0.0025, "step": 315 }, { "clip_ratio/high_max": 0.0025127332628471777, "clip_ratio/high_mean": 0.0009153177379630506, "clip_ratio/low_mean": 0.0007386767138086725, "clip_ratio/low_min": 6.958124140510336e-05, "clip_ratio/region_mean": 0.0016539944699616171, "epoch": 3.2612244897959184, "grad_norm": 0.1282661110162735, "learning_rate": 1e-06, "loss": 0.0053, "step": 316 }, { "clip_ratio/high_max": 0.0020202266205160413, "clip_ratio/high_mean": 0.0008413398063567001, "clip_ratio/low_mean": 0.0008453142381767975, "clip_ratio/low_min": 1.1168692253704648e-05, "clip_ratio/region_mean": 0.0016866540754563175, "epoch": 3.2705539358600584, "grad_norm": 0.11926227807998657, "learning_rate": 1e-06, "loss": 0.0148, "step": 317 }, { "clip_ratio/high_max": 0.001693990456260508, "clip_ratio/high_mean": 0.0007604379097756464, "clip_ratio/low_mean": 0.0008189695781766204, "clip_ratio/low_min": 4.240460020810133e-05, "clip_ratio/region_mean": 0.0015794074643054046, "epoch": 3.2798833819241984, "grad_norm": 0.12501415610313416, "learning_rate": 1e-06, "loss": 0.0184, "step": 318 }, { "clip_ratio/high_max": 0.002175408859329764, "clip_ratio/high_mean": 0.0009051734177774051, "clip_ratio/low_mean": 0.0006495585330412723, "clip_ratio/low_min": 1.1891172107425518e-05, "clip_ratio/region_mean": 0.0015547319708275609, "epoch": 3.2892128279883384, "grad_norm": 0.12889187037944794, "learning_rate": 1e-06, "loss": -0.0372, "step": 319 }, { "clip_ratio/high_max": 0.00194399193424033, "clip_ratio/high_mean": 0.0008185194928955752, "clip_ratio/low_mean": 0.0009551559560350142, "clip_ratio/low_min": 2.746005975495791e-05, "clip_ratio/region_mean": 0.0017736754816723987, "epoch": 3.298542274052478, "grad_norm": 0.12713217735290527, "learning_rate": 1e-06, "loss": 0.0185, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0249720982142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 653.035400390625, "completions/mean_terminated_length": 564.8552856445312, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 3.307871720116618, "grad_norm": 0.15227718651294708, "learning_rate": 1e-06, "loss": -0.0897, "num_tokens": 197352134.0, "reward": 0.595284640789032, "reward_std": 0.1841016262769699, "rewards/simpleverify_reward/mean": 0.5952845811843872, "rewards/simpleverify_reward/std": 0.4908539950847626, "step": 321 }, { "clip_ratio/high_max": 0.0017166272446047515, "clip_ratio/high_mean": 0.0006630584775848547, "clip_ratio/low_mean": 0.0005869686165169696, "clip_ratio/low_min": 4.6280901187856216e-05, "clip_ratio/region_mean": 0.0012500270750024356, "epoch": 3.317201166180758, "grad_norm": 0.14169949293136597, "learning_rate": 1e-06, "loss": 0.0468, "step": 322 }, { "clip_ratio/high_max": 0.0016737426522013266, "clip_ratio/high_mean": 0.0007451594356098212, "clip_ratio/low_mean": 0.0004720169463325874, "clip_ratio/low_min": 1.1172684935445432e-05, "clip_ratio/region_mean": 0.0012171763773949351, "epoch": 3.326530612244898, "grad_norm": 0.11879831552505493, "learning_rate": 1e-06, "loss": -0.0216, "step": 323 }, { "clip_ratio/high_max": 0.0017261785615119152, "clip_ratio/high_mean": 0.0007574270257464377, "clip_ratio/low_mean": 0.00046042904887144687, "clip_ratio/low_min": 2.662121733010281e-05, "clip_ratio/region_mean": 0.001217856060975464, "epoch": 3.335860058309038, "grad_norm": 0.1339976191520691, "learning_rate": 1e-06, "loss": -0.0195, "step": 324 }, { "clip_ratio/high_max": 0.0021916381156188436, "clip_ratio/high_mean": 0.000832803381854319, "clip_ratio/low_mean": 0.0006794900127715664, "clip_ratio/low_min": 3.67465099770925e-05, "clip_ratio/region_mean": 0.0015122933909879066, "epoch": 3.345189504373178, "grad_norm": 0.13726191222667694, "learning_rate": 1e-06, "loss": 0.0448, "step": 325 }, { "clip_ratio/high_max": 0.0017336187229375355, "clip_ratio/high_mean": 0.0007337221486523049, "clip_ratio/low_mean": 0.0007074746317812242, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014411967567866668, "epoch": 3.354518950437318, "grad_norm": 0.12681844830513, "learning_rate": 1e-06, "loss": 0.008, "step": 326 }, { "clip_ratio/high_max": 0.0018904508688137867, "clip_ratio/high_mean": 0.0008136879841913469, "clip_ratio/low_mean": 0.0006346239488266292, "clip_ratio/low_min": 1.651691309234593e-05, "clip_ratio/region_mean": 0.001448311912099598, "epoch": 3.363848396501458, "grad_norm": 0.14459159970283508, "learning_rate": 1e-06, "loss": -0.0086, "step": 327 }, { "clip_ratio/high_max": 0.0025539593334542587, "clip_ratio/high_mean": 0.0009507969698461238, "clip_ratio/low_mean": 0.0006640958763455274, "clip_ratio/low_min": 1.1884389095939696e-05, "clip_ratio/region_mean": 0.0016148928625625558, "epoch": 3.373177842565598, "grad_norm": 0.129063218832016, "learning_rate": 1e-06, "loss": -0.0108, "step": 328 }, { "clip_ratio/high_max": 0.002146779836039059, "clip_ratio/high_mean": 0.0008973109470389318, "clip_ratio/low_mean": 0.0006553078001161339, "clip_ratio/low_min": 8.441383215540554e-06, "clip_ratio/region_mean": 0.001552618748974055, "epoch": 3.3825072886297374, "grad_norm": 0.12080083042383194, "learning_rate": 1e-06, "loss": -0.0315, "step": 329 }, { "clip_ratio/high_max": 0.001751711930410238, "clip_ratio/high_mean": 0.0007533698180850479, "clip_ratio/low_mean": 0.000754638966100174, "clip_ratio/low_min": 2.6355553927714936e-05, "clip_ratio/region_mean": 0.001508008786913706, "epoch": 3.3918367346938774, "grad_norm": 0.13202187418937683, "learning_rate": 1e-06, "loss": 0.0034, "step": 330 }, { "clip_ratio/high_max": 0.0020411277073435485, "clip_ratio/high_mean": 0.0008222823362302734, "clip_ratio/low_mean": 0.0007693566258240025, "clip_ratio/low_min": 4.166630969848484e-05, "clip_ratio/region_mean": 0.0015916389747872017, "epoch": 3.4011661807580174, "grad_norm": 0.12989278137683868, "learning_rate": 1e-06, "loss": 0.0123, "step": 331 }, { "clip_ratio/high_max": 0.001821988364099525, "clip_ratio/high_mean": 0.0008121232622215757, "clip_ratio/low_mean": 0.0008057210452534491, "clip_ratio/low_min": 7.15208279871149e-05, "clip_ratio/region_mean": 0.001617844303837046, "epoch": 3.4104956268221573, "grad_norm": 0.14076241850852966, "learning_rate": 1e-06, "loss": 0.0117, "step": 332 }, { "clip_ratio/high_max": 0.0019886377922375686, "clip_ratio/high_mean": 0.0008466567378491163, "clip_ratio/low_mean": 0.0009212721070070984, "clip_ratio/low_min": 8.529675324098207e-05, "clip_ratio/region_mean": 0.0017679288357612677, "epoch": 3.4198250728862973, "grad_norm": 0.13854318857192993, "learning_rate": 1e-06, "loss": 0.0221, "step": 333 }, { "clip_ratio/high_max": 0.0018617175956023857, "clip_ratio/high_mean": 0.0008360159408766776, "clip_ratio/low_mean": 0.0009085234414669685, "clip_ratio/low_min": 4.162658842687961e-05, "clip_ratio/region_mean": 0.001744539404171519, "epoch": 3.4291545189504373, "grad_norm": 0.12630914151668549, "learning_rate": 1e-06, "loss": 0.0266, "step": 334 }, { "clip_ratio/high_max": 0.002005110389291076, "clip_ratio/high_mean": 0.000755791263145511, "clip_ratio/low_mean": 0.0009587220738467295, "clip_ratio/low_min": 5.2178563237248454e-05, "clip_ratio/region_mean": 0.0017145132951554842, "epoch": 3.4384839650145773, "grad_norm": 0.13822253048419952, "learning_rate": 1e-06, "loss": 0.0027, "step": 335 }, { "clip_ratio/high_max": 0.0020469805713219102, "clip_ratio/high_mean": 0.0008232913387473673, "clip_ratio/low_mean": 0.0009008159122458892, "clip_ratio/low_min": 8.091965719358996e-05, "clip_ratio/region_mean": 0.0017241072346223518, "epoch": 3.4478134110787173, "grad_norm": 0.13763093948364258, "learning_rate": 1e-06, "loss": 0.038, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028599330357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3885.0, "completions/mean_length": 658.8094482421875, "completions/mean_terminated_length": 557.6139526367188, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 3.4571428571428573, "grad_norm": 0.12403003126382828, "learning_rate": 1e-06, "loss": -0.0189, "num_tokens": 206570610.0, "reward": 0.60498046875, "reward_std": 0.18488143384456635, "rewards/simpleverify_reward/mean": 0.60498046875, "rewards/simpleverify_reward/std": 0.4888719320297241, "step": 337 }, { "clip_ratio/high_max": 0.001972136626136489, "clip_ratio/high_mean": 0.000792622180597391, "clip_ratio/low_mean": 0.0005690870639227796, "clip_ratio/low_min": 1.2204647646285594e-05, "clip_ratio/region_mean": 0.0013617092263302766, "epoch": 3.466472303206997, "grad_norm": 0.13247369229793549, "learning_rate": 1e-06, "loss": 0.013, "step": 338 }, { "clip_ratio/high_max": 0.002160618645575596, "clip_ratio/high_mean": 0.0007373291009571403, "clip_ratio/low_mean": 0.0005553062568424139, "clip_ratio/low_min": 1.6951451470959e-05, "clip_ratio/region_mean": 0.0012926353265356738, "epoch": 3.4758017492711373, "grad_norm": 0.15079790353775024, "learning_rate": 1e-06, "loss": 0.015, "step": 339 }, { "clip_ratio/high_max": 0.001785652249964187, "clip_ratio/high_mean": 0.0007135673604352633, "clip_ratio/low_mean": 0.0006396340177161619, "clip_ratio/low_min": 7.717369044257794e-05, "clip_ratio/region_mean": 0.0013532014017982874, "epoch": 3.485131195335277, "grad_norm": 0.1295570731163025, "learning_rate": 1e-06, "loss": -0.0125, "step": 340 }, { "clip_ratio/high_max": 0.0020896260321023874, "clip_ratio/high_mean": 0.0009250791117665358, "clip_ratio/low_mean": 0.0006579237187906983, "clip_ratio/low_min": 3.5063396353507414e-05, "clip_ratio/region_mean": 0.001583002846018644, "epoch": 3.494460641399417, "grad_norm": 0.14507369697093964, "learning_rate": 1e-06, "loss": -0.0215, "step": 341 }, { "clip_ratio/high_max": 0.0017310734983766451, "clip_ratio/high_mean": 0.0008441622121608816, "clip_ratio/low_mean": 0.0007659088032596628, "clip_ratio/low_min": 8.721328777028248e-05, "clip_ratio/region_mean": 0.0016100710272439755, "epoch": 3.503790087463557, "grad_norm": 0.1391536444425583, "learning_rate": 1e-06, "loss": -0.0091, "step": 342 }, { "clip_ratio/high_max": 0.001988524803891778, "clip_ratio/high_mean": 0.0008353917473868933, "clip_ratio/low_mean": 0.0007471626722690416, "clip_ratio/low_min": 4.443542638910003e-05, "clip_ratio/region_mean": 0.0015825543814571574, "epoch": 3.513119533527697, "grad_norm": 0.15481480956077576, "learning_rate": 1e-06, "loss": 0.0209, "step": 343 }, { "clip_ratio/high_max": 0.002258638050989248, "clip_ratio/high_mean": 0.0008268516121461289, "clip_ratio/low_mean": 0.0007452916815964272, "clip_ratio/low_min": 1.5173585779848509e-05, "clip_ratio/region_mean": 0.0015721432901045773, "epoch": 3.522448979591837, "grad_norm": 0.1509442925453186, "learning_rate": 1e-06, "loss": 0.0282, "step": 344 }, { "clip_ratio/high_max": 0.0017970880107895937, "clip_ratio/high_mean": 0.0007754464650133741, "clip_ratio/low_mean": 0.000627942594292108, "clip_ratio/low_min": 5.9505990975594614e-05, "clip_ratio/region_mean": 0.001403389029292157, "epoch": 3.5317784256559768, "grad_norm": 0.11863583326339722, "learning_rate": 1e-06, "loss": -0.0044, "step": 345 }, { "clip_ratio/high_max": 0.0020055183849763125, "clip_ratio/high_mean": 0.0008519104776496533, "clip_ratio/low_mean": 0.0007014453467490966, "clip_ratio/low_min": 8.000228535820497e-05, "clip_ratio/region_mean": 0.00155335583986016, "epoch": 3.5411078717201168, "grad_norm": 0.1341390758752823, "learning_rate": 1e-06, "loss": -0.0011, "step": 346 }, { "clip_ratio/high_max": 0.0022656471337541007, "clip_ratio/high_mean": 0.0009472022338741226, "clip_ratio/low_mean": 0.0008167266751115676, "clip_ratio/low_min": 3.487723370199092e-05, "clip_ratio/region_mean": 0.0017639289362705313, "epoch": 3.5504373177842563, "grad_norm": 0.13417421281337738, "learning_rate": 1e-06, "loss": 0.0064, "step": 347 }, { "clip_ratio/high_max": 0.0023399665296892636, "clip_ratio/high_mean": 0.0009603422422514996, "clip_ratio/low_mean": 0.0007713568847975694, "clip_ratio/low_min": 3.5444234526949e-05, "clip_ratio/region_mean": 0.0017316991288680583, "epoch": 3.5597667638483967, "grad_norm": 0.15852801501750946, "learning_rate": 1e-06, "loss": -0.0153, "step": 348 }, { "clip_ratio/high_max": 0.0019903949105355423, "clip_ratio/high_mean": 0.0008738572450965876, "clip_ratio/low_mean": 0.0007748459520371398, "clip_ratio/low_min": 1.5679879652452655e-05, "clip_ratio/region_mean": 0.001648703204409685, "epoch": 3.5690962099125363, "grad_norm": 0.12920647859573364, "learning_rate": 1e-06, "loss": -0.0362, "step": 349 }, { "clip_ratio/high_max": 0.002073092677164823, "clip_ratio/high_mean": 0.0008930051244533388, "clip_ratio/low_mean": 0.0008246085617429344, "clip_ratio/low_min": 1.6456029698019847e-05, "clip_ratio/region_mean": 0.0017176136170746759, "epoch": 3.5784256559766763, "grad_norm": 0.14616502821445465, "learning_rate": 1e-06, "loss": -0.0299, "step": 350 }, { "clip_ratio/high_max": 0.0019175592060491908, "clip_ratio/high_mean": 0.0008384221891901689, "clip_ratio/low_mean": 0.0008720893074496416, "clip_ratio/low_min": 0.0001230034467880614, "clip_ratio/region_mean": 0.001710511525743641, "epoch": 3.5877551020408163, "grad_norm": 0.12375102937221527, "learning_rate": 1e-06, "loss": 0.0051, "step": 351 }, { "clip_ratio/high_max": 0.002297624363563955, "clip_ratio/high_mean": 0.0008397939691349166, "clip_ratio/low_mean": 0.0009426305387023604, "clip_ratio/low_min": 8.49585530886543e-05, "clip_ratio/region_mean": 0.0017824244932853617, "epoch": 3.5970845481049563, "grad_norm": 0.13397181034088135, "learning_rate": 1e-06, "loss": 0.0265, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0248325892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 647.344482421875, "completions/mean_terminated_length": 559.524658203125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 3.6064139941690962, "grad_norm": 0.13809792697429657, "learning_rate": 1e-06, "loss": -0.016, "num_tokens": 215790548.0, "reward": 0.611328125, "reward_std": 0.18358643352985382, "rewards/simpleverify_reward/mean": 0.611328125, "rewards/simpleverify_reward/std": 0.487465500831604, "step": 353 }, { "clip_ratio/high_max": 0.0018932658131234348, "clip_ratio/high_mean": 0.0007701791182626039, "clip_ratio/low_mean": 0.0006212290973053314, "clip_ratio/low_min": 9.490573211223818e-05, "clip_ratio/region_mean": 0.0013914082155679353, "epoch": 3.6157434402332362, "grad_norm": 0.14309000968933105, "learning_rate": 1e-06, "loss": 0.0127, "step": 354 }, { "clip_ratio/high_max": 0.0018599841969262343, "clip_ratio/high_mean": 0.0007546511569671566, "clip_ratio/low_mean": 0.000595229761529481, "clip_ratio/low_min": 4.0200537114287727e-05, "clip_ratio/region_mean": 0.0013498809166776482, "epoch": 3.6250728862973762, "grad_norm": 0.14259673655033112, "learning_rate": 1e-06, "loss": 0.005, "step": 355 }, { "clip_ratio/high_max": 0.0019480742776067927, "clip_ratio/high_mean": 0.0008742873105802573, "clip_ratio/low_mean": 0.0006196865642777993, "clip_ratio/low_min": 1.5489467841689475e-05, "clip_ratio/region_mean": 0.0014939739048713818, "epoch": 3.6344023323615158, "grad_norm": 0.12907080352306366, "learning_rate": 1e-06, "loss": -0.0185, "step": 356 }, { "clip_ratio/high_max": 0.002096452357363887, "clip_ratio/high_mean": 0.0008745319282752462, "clip_ratio/low_mean": 0.0006297699928836664, "clip_ratio/low_min": 4.229914156894665e-05, "clip_ratio/region_mean": 0.00150430195935769, "epoch": 3.643731778425656, "grad_norm": 0.14012117683887482, "learning_rate": 1e-06, "loss": -0.0473, "step": 357 }, { "clip_ratio/high_max": 0.001913920117658563, "clip_ratio/high_mean": 0.0006733141744916793, "clip_ratio/low_mean": 0.0007174781439971412, "clip_ratio/low_min": 5.916573809372494e-05, "clip_ratio/region_mean": 0.0013907923457736615, "epoch": 3.6530612244897958, "grad_norm": 0.13917294144630432, "learning_rate": 1e-06, "loss": 0.0318, "step": 358 }, { "clip_ratio/high_max": 0.002038639551756205, "clip_ratio/high_mean": 0.0009362640739709605, "clip_ratio/low_mean": 0.0006754394698873512, "clip_ratio/low_min": 2.6754089049063623e-05, "clip_ratio/region_mean": 0.0016117035374918487, "epoch": 3.6623906705539357, "grad_norm": 0.13587848842144012, "learning_rate": 1e-06, "loss": -0.0147, "step": 359 }, { "clip_ratio/high_max": 0.0017512241247459315, "clip_ratio/high_mean": 0.0007350286159635289, "clip_ratio/low_mean": 0.0007715390984230908, "clip_ratio/low_min": 7.827021818229696e-05, "clip_ratio/region_mean": 0.001506567707110662, "epoch": 3.6717201166180757, "grad_norm": 0.12973980605602264, "learning_rate": 1e-06, "loss": 0.0083, "step": 360 }, { "clip_ratio/high_max": 0.00213849472856964, "clip_ratio/high_mean": 0.0008418706438533263, "clip_ratio/low_mean": 0.0007072877888276707, "clip_ratio/low_min": 5.299445092532551e-05, "clip_ratio/region_mean": 0.0015491584490519017, "epoch": 3.6810495626822157, "grad_norm": 0.13317175209522247, "learning_rate": 1e-06, "loss": 0.0034, "step": 361 }, { "clip_ratio/high_max": 0.0018911914230557159, "clip_ratio/high_mean": 0.0007951952575240284, "clip_ratio/low_mean": 0.0007770923220959958, "clip_ratio/low_min": 3.791793915297603e-05, "clip_ratio/region_mean": 0.001572287583258003, "epoch": 3.6903790087463557, "grad_norm": 0.13152183592319489, "learning_rate": 1e-06, "loss": -0.0251, "step": 362 }, { "clip_ratio/high_max": 0.0019327114532643463, "clip_ratio/high_mean": 0.0008031058105189004, "clip_ratio/low_mean": 0.0008896129838831257, "clip_ratio/low_min": 7.073042797856033e-05, "clip_ratio/region_mean": 0.0016927188262343407, "epoch": 3.6997084548104957, "grad_norm": 0.13154615461826324, "learning_rate": 1e-06, "loss": 0.0209, "step": 363 }, { "clip_ratio/high_max": 0.00190185406972887, "clip_ratio/high_mean": 0.0007574819301225943, "clip_ratio/low_mean": 0.00089092393318424, "clip_ratio/low_min": 8.049170173762832e-05, "clip_ratio/region_mean": 0.0016484058505739085, "epoch": 3.7090379008746357, "grad_norm": 0.20127904415130615, "learning_rate": 1e-06, "loss": 0.0299, "step": 364 }, { "clip_ratio/high_max": 0.0019361752274562605, "clip_ratio/high_mean": 0.0008282441267510876, "clip_ratio/low_mean": 0.0010528558068472194, "clip_ratio/low_min": 7.073859887896106e-05, "clip_ratio/region_mean": 0.0018810999536071904, "epoch": 3.7183673469387752, "grad_norm": 0.13065855205059052, "learning_rate": 1e-06, "loss": -0.0043, "step": 365 }, { "clip_ratio/high_max": 0.0019397070464037824, "clip_ratio/high_mean": 0.0007790359304635786, "clip_ratio/low_mean": 0.0008844799813232385, "clip_ratio/low_min": 5.07232325617224e-05, "clip_ratio/region_mean": 0.0016635158826829866, "epoch": 3.7276967930029157, "grad_norm": 0.12333003431558609, "learning_rate": 1e-06, "loss": -0.0134, "step": 366 }, { "clip_ratio/high_max": 0.0020293179150030483, "clip_ratio/high_mean": 0.0009107535333896521, "clip_ratio/low_mean": 0.0008826762459648307, "clip_ratio/low_min": 6.054913865227718e-05, "clip_ratio/region_mean": 0.0017934297939063981, "epoch": 3.7370262390670552, "grad_norm": 0.12548886239528656, "learning_rate": 1e-06, "loss": -0.0391, "step": 367 }, { "clip_ratio/high_max": 0.0022486291964014526, "clip_ratio/high_mean": 0.0007547445438831346, "clip_ratio/low_mean": 0.0010447937493154313, "clip_ratio/low_min": 4.6772252972004935e-05, "clip_ratio/region_mean": 0.0017995382950175554, "epoch": 3.746355685131195, "grad_norm": 0.13694019615650177, "learning_rate": 1e-06, "loss": 0.0177, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025599888392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 656.8368530273438, "completions/mean_terminated_length": 566.4815673828125, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 3.755685131195335, "grad_norm": 0.14345668256282806, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 225091937.0, "reward": 0.613839328289032, "reward_std": 0.1757359802722931, "rewards/simpleverify_reward/mean": 0.6138392686843872, "rewards/simpleverify_reward/std": 0.4868851602077484, "step": 369 }, { "clip_ratio/high_max": 0.0018400437584205065, "clip_ratio/high_mean": 0.0007343808929363149, "clip_ratio/low_mean": 0.0005707125637854915, "clip_ratio/low_min": 3.477920745353913e-05, "clip_ratio/region_mean": 0.001305093443079386, "epoch": 3.765014577259475, "grad_norm": 0.14134864509105682, "learning_rate": 1e-06, "loss": 0.0396, "step": 370 }, { "clip_ratio/high_max": 0.001933447681949474, "clip_ratio/high_mean": 0.0007380321471828211, "clip_ratio/low_mean": 0.0005362306173992692, "clip_ratio/low_min": 3.784755517699523e-05, "clip_ratio/region_mean": 0.0012742627441184595, "epoch": 3.774344023323615, "grad_norm": 0.1336478441953659, "learning_rate": 1e-06, "loss": -0.0357, "step": 371 }, { "clip_ratio/high_max": 0.002023385310167214, "clip_ratio/high_mean": 0.0007725142022536602, "clip_ratio/low_mean": 0.0005365621627788641, "clip_ratio/low_min": 1.0224112884316128e-05, "clip_ratio/region_mean": 0.0013090763459331356, "epoch": 3.783673469387755, "grad_norm": 0.12566912174224854, "learning_rate": 1e-06, "loss": -0.0106, "step": 372 }, { "clip_ratio/high_max": 0.0016083677946880925, "clip_ratio/high_mean": 0.000686061169290042, "clip_ratio/low_mean": 0.00070582997159363, "clip_ratio/low_min": 6.713408220093697e-05, "clip_ratio/region_mean": 0.0013918911281507462, "epoch": 3.793002915451895, "grad_norm": 0.12212657183408737, "learning_rate": 1e-06, "loss": 0.0567, "step": 373 }, { "clip_ratio/high_max": 0.0016963184607448056, "clip_ratio/high_mean": 0.0006963937703403644, "clip_ratio/low_mean": 0.000531734511241666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012281282943149563, "epoch": 3.8023323615160347, "grad_norm": 0.12506172060966492, "learning_rate": 1e-06, "loss": -0.0339, "step": 374 }, { "clip_ratio/high_max": 0.001918855316034751, "clip_ratio/high_mean": 0.0008241226842073957, "clip_ratio/low_mean": 0.0006836469065092388, "clip_ratio/low_min": 6.271101392485434e-05, "clip_ratio/region_mean": 0.001507769564341288, "epoch": 3.811661807580175, "grad_norm": 0.11953527480363846, "learning_rate": 1e-06, "loss": -0.0095, "step": 375 }, { "clip_ratio/high_max": 0.0021414353395812213, "clip_ratio/high_mean": 0.0007881643068685662, "clip_ratio/low_mean": 0.0007476024729839992, "clip_ratio/low_min": 6.328299241431523e-05, "clip_ratio/region_mean": 0.0015357667762145866, "epoch": 3.8209912536443147, "grad_norm": 0.15017814934253693, "learning_rate": 1e-06, "loss": 0.0238, "step": 376 }, { "clip_ratio/high_max": 0.0016525070604984649, "clip_ratio/high_mean": 0.0007044180511002196, "clip_ratio/low_mean": 0.0007751957145956112, "clip_ratio/low_min": 2.3611276446899865e-05, "clip_ratio/region_mean": 0.0014796137693338096, "epoch": 3.8303206997084547, "grad_norm": 0.14239539206027985, "learning_rate": 1e-06, "loss": 0.017, "step": 377 }, { "clip_ratio/high_max": 0.0017872976313810796, "clip_ratio/high_mean": 0.0007390251166725648, "clip_ratio/low_mean": 0.0006384620755852666, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013774871804344002, "epoch": 3.8396501457725947, "grad_norm": 0.12178703397512436, "learning_rate": 1e-06, "loss": -0.0327, "step": 378 }, { "clip_ratio/high_max": 0.001903229371237103, "clip_ratio/high_mean": 0.0008364490913663758, "clip_ratio/low_mean": 0.0007336869120990741, "clip_ratio/low_min": 6.0237469369894825e-05, "clip_ratio/region_mean": 0.0015701359952799976, "epoch": 3.8489795918367347, "grad_norm": 0.13856664299964905, "learning_rate": 1e-06, "loss": -0.0187, "step": 379 }, { "clip_ratio/high_max": 0.0019037699821637943, "clip_ratio/high_mean": 0.0008285402072942816, "clip_ratio/low_mean": 0.0008702890245331218, "clip_ratio/low_min": 3.083374394918792e-05, "clip_ratio/region_mean": 0.0016988292372843716, "epoch": 3.8583090379008746, "grad_norm": 0.1500893533229828, "learning_rate": 1e-06, "loss": -0.0013, "step": 380 }, { "clip_ratio/high_max": 0.001953663319000043, "clip_ratio/high_mean": 0.0008534812186553609, "clip_ratio/low_mean": 0.0008310416437780077, "clip_ratio/low_min": 4.4442067519412376e-05, "clip_ratio/region_mean": 0.001684522856521653, "epoch": 3.8676384839650146, "grad_norm": 0.12926261126995087, "learning_rate": 1e-06, "loss": 0.0214, "step": 381 }, { "clip_ratio/high_max": 0.0019278826948720962, "clip_ratio/high_mean": 0.0008015198163775494, "clip_ratio/low_mean": 0.0007348083720444265, "clip_ratio/low_min": 9.344306727143703e-05, "clip_ratio/region_mean": 0.001536328158181277, "epoch": 3.8769679300291546, "grad_norm": 0.12293867766857147, "learning_rate": 1e-06, "loss": -0.0161, "step": 382 }, { "clip_ratio/high_max": 0.0021920848739682697, "clip_ratio/high_mean": 0.000945954929193249, "clip_ratio/low_mean": 0.0007181500204751501, "clip_ratio/low_min": 4.2185941310890485e-05, "clip_ratio/region_mean": 0.001664104940573452, "epoch": 3.8862973760932946, "grad_norm": 0.12077221274375916, "learning_rate": 1e-06, "loss": -0.035, "step": 383 }, { "clip_ratio/high_max": 0.0019135183501930442, "clip_ratio/high_mean": 0.0007878634432927356, "clip_ratio/low_mean": 0.0008921718908823095, "clip_ratio/low_min": 4.851229095947929e-05, "clip_ratio/region_mean": 0.0016800353332655504, "epoch": 3.8956268221574346, "grad_norm": 0.12633126974105835, "learning_rate": 1e-06, "loss": 0.0313, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02783203125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 671.3526611328125, "completions/mean_terminated_length": 573.3089599609375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 4.0093294460641395, "grad_norm": 0.1529153287410736, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 234497616.0, "reward": 0.6082589626312256, "reward_std": 0.17632515728473663, "rewards/simpleverify_reward/mean": 0.6082589030265808, "rewards/simpleverify_reward/std": 0.48815637826919556, "step": 385 }, { "clip_ratio/high_max": 0.0019577243510866538, "clip_ratio/high_mean": 0.0007252702598634642, "clip_ratio/low_mean": 0.0005376849376261816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012629551856662147, "epoch": 4.01865889212828, "grad_norm": 0.13885675370693207, "learning_rate": 1e-06, "loss": 0.0038, "step": 386 }, { "clip_ratio/high_max": 0.0017147296930488665, "clip_ratio/high_mean": 0.000674854616590892, "clip_ratio/low_mean": 0.0005597780473181047, "clip_ratio/low_min": 3.375641244929284e-05, "clip_ratio/region_mean": 0.0012346326693659648, "epoch": 4.0279883381924195, "grad_norm": 0.23313739895820618, "learning_rate": 1e-06, "loss": -0.004, "step": 387 }, { "clip_ratio/high_max": 0.0017099013530241791, "clip_ratio/high_mean": 0.0006581401121366071, "clip_ratio/low_mean": 0.0005462319159050821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001204372038046131, "epoch": 4.03731778425656, "grad_norm": 0.13231658935546875, "learning_rate": 1e-06, "loss": -0.0225, "step": 388 }, { "clip_ratio/high_max": 0.0017164169839816168, "clip_ratio/high_mean": 0.0006906441976752831, "clip_ratio/low_mean": 0.0005461070904857479, "clip_ratio/low_min": 2.5730752895469777e-05, "clip_ratio/region_mean": 0.0012367513045319356, "epoch": 4.0466472303206995, "grad_norm": 0.13113944232463837, "learning_rate": 1e-06, "loss": 0.0059, "step": 389 }, { "clip_ratio/high_max": 0.0018892693988163956, "clip_ratio/high_mean": 0.000764418400649447, "clip_ratio/low_mean": 0.0007307615014724433, "clip_ratio/low_min": 3.8069822039688006e-05, "clip_ratio/region_mean": 0.0014951798802940175, "epoch": 4.05597667638484, "grad_norm": 0.13379260897636414, "learning_rate": 1e-06, "loss": 0.0094, "step": 390 }, { "clip_ratio/high_max": 0.0018383670067123603, "clip_ratio/high_mean": 0.0007633827835888951, "clip_ratio/low_mean": 0.0005453815265354933, "clip_ratio/low_min": 2.230018981208559e-05, "clip_ratio/region_mean": 0.0013087643128528725, "epoch": 4.0653061224489795, "grad_norm": 0.12431970238685608, "learning_rate": 1e-06, "loss": -0.0052, "step": 391 }, { "clip_ratio/high_max": 0.0019754939057747833, "clip_ratio/high_mean": 0.0007750319837214192, "clip_ratio/low_mean": 0.0006550831321874284, "clip_ratio/low_min": 1.4098804058448877e-05, "clip_ratio/region_mean": 0.0014301151168183424, "epoch": 4.07463556851312, "grad_norm": 0.14189937710762024, "learning_rate": 1e-06, "loss": 0.0316, "step": 392 }, { "clip_ratio/high_max": 0.002105160845530918, "clip_ratio/high_mean": 0.0007981947428561398, "clip_ratio/low_mean": 0.0005837283351866063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001381923102599103, "epoch": 4.0839650145772595, "grad_norm": 0.1334790140390396, "learning_rate": 1e-06, "loss": -0.007, "step": 393 }, { "clip_ratio/high_max": 0.002172323496779427, "clip_ratio/high_mean": 0.0009016791991598438, "clip_ratio/low_mean": 0.000730549281797721, "clip_ratio/low_min": 5.245133888820419e-05, "clip_ratio/region_mean": 0.0016322284791385755, "epoch": 4.093294460641399, "grad_norm": 0.13940821588039398, "learning_rate": 1e-06, "loss": -0.0803, "step": 394 }, { "clip_ratio/high_max": 0.0017852749188023154, "clip_ratio/high_mean": 0.0007002063721301965, "clip_ratio/low_mean": 0.0006869080316391774, "clip_ratio/low_min": 3.845369519694941e-05, "clip_ratio/region_mean": 0.001387114400131395, "epoch": 4.1026239067055394, "grad_norm": 0.14005617797374725, "learning_rate": 1e-06, "loss": 0.0015, "step": 395 }, { "clip_ratio/high_max": 0.0017021473904605955, "clip_ratio/high_mean": 0.0007668339130759705, "clip_ratio/low_mean": 0.0008035838282012264, "clip_ratio/low_min": 0.00011676601116050733, "clip_ratio/region_mean": 0.0015704177567386068, "epoch": 4.111953352769679, "grad_norm": 0.14277762174606323, "learning_rate": 1e-06, "loss": 0.0197, "step": 396 }, { "clip_ratio/high_max": 0.0018311490493942983, "clip_ratio/high_mean": 0.0007805465429555625, "clip_ratio/low_mean": 0.0008844130679790396, "clip_ratio/low_min": 3.1898929591989145e-05, "clip_ratio/region_mean": 0.0016649595636408776, "epoch": 4.121282798833819, "grad_norm": 0.135515958070755, "learning_rate": 1e-06, "loss": 0.0083, "step": 397 }, { "clip_ratio/high_max": 0.0019733717672352213, "clip_ratio/high_mean": 0.0007612671815877547, "clip_ratio/low_mean": 0.000839572296172264, "clip_ratio/low_min": 7.499046205339255e-05, "clip_ratio/region_mean": 0.0016008394813979976, "epoch": 4.130612244897959, "grad_norm": 0.13475604355335236, "learning_rate": 1e-06, "loss": 0.0273, "step": 398 }, { "clip_ratio/high_max": 0.00188804973004153, "clip_ratio/high_mean": 0.0007694981886743335, "clip_ratio/low_mean": 0.0009495820486336015, "clip_ratio/low_min": 5.27039392181905e-05, "clip_ratio/region_mean": 0.0017190802027471364, "epoch": 4.139941690962099, "grad_norm": 0.11878134310245514, "learning_rate": 1e-06, "loss": 0.0124, "step": 399 }, { "clip_ratio/high_max": 0.0022606485545111354, "clip_ratio/high_mean": 0.000956731115365983, "clip_ratio/low_mean": 0.0007340354604821187, "clip_ratio/low_min": 3.009147803822998e-05, "clip_ratio/region_mean": 0.0016907665922190063, "epoch": 4.149271137026239, "grad_norm": 0.12470757216215134, "learning_rate": 1e-06, "loss": -0.0307, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028529575892857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 672.625732421875, "completions/mean_terminated_length": 572.0900268554688, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 4.158600583090379, "grad_norm": 0.15526865422725677, "learning_rate": 1e-06, "loss": 0.0206, "num_tokens": 243905370.0, "reward": 0.6121652126312256, "reward_std": 0.1780315339565277, "rewards/simpleverify_reward/mean": 0.6121651530265808, "rewards/simpleverify_reward/std": 0.48727357387542725, "step": 401 }, { "clip_ratio/high_max": 0.0016996623999148142, "clip_ratio/high_mean": 0.0007169887339841807, "clip_ratio/low_mean": 0.0005328461011231411, "clip_ratio/low_min": 2.2178850485943258e-05, "clip_ratio/region_mean": 0.0012498348514782265, "epoch": 4.167930029154519, "grad_norm": 0.1462271362543106, "learning_rate": 1e-06, "loss": -0.0315, "step": 402 }, { "clip_ratio/high_max": 0.0015384060498035979, "clip_ratio/high_mean": 0.0006313552903520758, "clip_ratio/low_mean": 0.0006047548313290463, "clip_ratio/low_min": 2.4326724087586626e-05, "clip_ratio/region_mean": 0.0012361101362330373, "epoch": 4.1772594752186585, "grad_norm": 0.11831765621900558, "learning_rate": 1e-06, "loss": 0.0015, "step": 403 }, { "clip_ratio/high_max": 0.002229679668744211, "clip_ratio/high_mean": 0.0007981007156558917, "clip_ratio/low_mean": 0.0005675280376635783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013656287601406802, "epoch": 4.186588921282799, "grad_norm": 0.12994642555713654, "learning_rate": 1e-06, "loss": -0.0312, "step": 404 }, { "clip_ratio/high_max": 0.001905001510749571, "clip_ratio/high_mean": 0.0007830419381207321, "clip_ratio/low_mean": 0.000646907972623012, "clip_ratio/low_min": 1.7715419744490646e-05, "clip_ratio/region_mean": 0.0014299499162007123, "epoch": 4.1959183673469385, "grad_norm": 0.13852684199810028, "learning_rate": 1e-06, "loss": -0.0147, "step": 405 }, { "clip_ratio/high_max": 0.00155239269224694, "clip_ratio/high_mean": 0.000716466007361305, "clip_ratio/low_mean": 0.0006957045934541384, "clip_ratio/low_min": 2.95541831292212e-05, "clip_ratio/region_mean": 0.00141217054988374, "epoch": 4.205247813411079, "grad_norm": 0.14223319292068481, "learning_rate": 1e-06, "loss": -0.0214, "step": 406 }, { "clip_ratio/high_max": 0.0020819639503315557, "clip_ratio/high_mean": 0.0007675492270209361, "clip_ratio/low_mean": 0.0006998460903560044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014673953264718875, "epoch": 4.214577259475218, "grad_norm": 0.1483393907546997, "learning_rate": 1e-06, "loss": 0.0071, "step": 407 }, { "clip_ratio/high_max": 0.001884318291558884, "clip_ratio/high_mean": 0.000763836489568348, "clip_ratio/low_mean": 0.0006492118282039883, "clip_ratio/low_min": 9.474003491050098e-06, "clip_ratio/region_mean": 0.0014130483286862727, "epoch": 4.223906705539359, "grad_norm": 0.13364793360233307, "learning_rate": 1e-06, "loss": -0.0162, "step": 408 }, { "clip_ratio/high_max": 0.002078667541354662, "clip_ratio/high_mean": 0.0007714645926171215, "clip_ratio/low_mean": 0.0009063264315045672, "clip_ratio/low_min": 4.177046139375307e-05, "clip_ratio/region_mean": 0.0016777910059317946, "epoch": 4.233236151603498, "grad_norm": 0.15086346864700317, "learning_rate": 1e-06, "loss": 0.0223, "step": 409 }, { "clip_ratio/high_max": 0.002239151086541824, "clip_ratio/high_mean": 0.0008801519306871342, "clip_ratio/low_mean": 0.0007311477202165406, "clip_ratio/low_min": 2.8453273444029037e-05, "clip_ratio/region_mean": 0.0016112996236188337, "epoch": 4.242565597667639, "grad_norm": 0.14248883724212646, "learning_rate": 1e-06, "loss": -0.0019, "step": 410 }, { "clip_ratio/high_max": 0.0018246435138280503, "clip_ratio/high_mean": 0.0007053892222756986, "clip_ratio/low_mean": 0.0006835853364464128, "clip_ratio/low_min": 3.619405561039457e-05, "clip_ratio/region_mean": 0.0013889745605411008, "epoch": 4.251895043731778, "grad_norm": 0.11880943179130554, "learning_rate": 1e-06, "loss": -0.0072, "step": 411 }, { "clip_ratio/high_max": 0.0017824490350903943, "clip_ratio/high_mean": 0.0007422523085551802, "clip_ratio/low_mean": 0.0008661887550260872, "clip_ratio/low_min": 1.9278222680441104e-05, "clip_ratio/region_mean": 0.0016084410599432886, "epoch": 4.261224489795918, "grad_norm": 0.13260233402252197, "learning_rate": 1e-06, "loss": 0.0186, "step": 412 }, { "clip_ratio/high_max": 0.002064970671199262, "clip_ratio/high_mean": 0.0007348162689595483, "clip_ratio/low_mean": 0.0008080794541456271, "clip_ratio/low_min": 8.099741717160214e-05, "clip_ratio/region_mean": 0.0015428957085532602, "epoch": 4.270553935860058, "grad_norm": 0.13375383615493774, "learning_rate": 1e-06, "loss": 0.0094, "step": 413 }, { "clip_ratio/high_max": 0.0021592688462988008, "clip_ratio/high_mean": 0.0008135633825077093, "clip_ratio/low_mean": 0.0007649813596799504, "clip_ratio/low_min": 3.117303549515782e-05, "clip_ratio/region_mean": 0.0015785447576490697, "epoch": 4.279883381924198, "grad_norm": 0.1349503993988037, "learning_rate": 1e-06, "loss": -0.0186, "step": 414 }, { "clip_ratio/high_max": 0.0020423954192665406, "clip_ratio/high_mean": 0.0008645430953038158, "clip_ratio/low_mean": 0.0009001363887364278, "clip_ratio/low_min": 2.738825605774764e-05, "clip_ratio/region_mean": 0.00176467949495418, "epoch": 4.289212827988338, "grad_norm": 0.13946153223514557, "learning_rate": 1e-06, "loss": 0.0218, "step": 415 }, { "clip_ratio/high_max": 0.002237029701063875, "clip_ratio/high_mean": 0.000916131709345791, "clip_ratio/low_mean": 0.0009940072122844867, "clip_ratio/low_min": 6.257436416490236e-05, "clip_ratio/region_mean": 0.0019101389334537089, "epoch": 4.298542274052478, "grad_norm": 0.15886887907981873, "learning_rate": 1e-06, "loss": -0.016, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028948102678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 660.2079467773438, "completions/mean_terminated_length": 557.7832641601562, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 4.307871720116618, "grad_norm": 0.14039747416973114, "learning_rate": 1e-06, "loss": 0.0061, "num_tokens": 253119367.0, "reward": 0.622628390789032, "reward_std": 0.17434236407279968, "rewards/simpleverify_reward/mean": 0.6226283311843872, "rewards/simpleverify_reward/std": 0.48474594950675964, "step": 417 }, { "clip_ratio/high_max": 0.001721075412206119, "clip_ratio/high_mean": 0.0008207943265006179, "clip_ratio/low_mean": 0.0005281883873067272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001348982725176029, "epoch": 4.317201166180758, "grad_norm": 0.1351977288722992, "learning_rate": 1e-06, "loss": -0.0326, "step": 418 }, { "clip_ratio/high_max": 0.0017618036181374919, "clip_ratio/high_mean": 0.0007112743187462911, "clip_ratio/low_mean": 0.0004981588258488046, "clip_ratio/low_min": 1.1026817446690984e-05, "clip_ratio/region_mean": 0.0012094331286789384, "epoch": 4.326530612244898, "grad_norm": 0.14089015126228333, "learning_rate": 1e-06, "loss": -0.0147, "step": 419 }, { "clip_ratio/high_max": 0.0018188980175182223, "clip_ratio/high_mean": 0.00068837766048091, "clip_ratio/low_mean": 0.0006974110474402551, "clip_ratio/low_min": 6.540434060298139e-05, "clip_ratio/region_mean": 0.0013857886733603664, "epoch": 4.335860058309038, "grad_norm": 0.1428503394126892, "learning_rate": 1e-06, "loss": 0.0438, "step": 420 }, { "clip_ratio/high_max": 0.0023632534575881436, "clip_ratio/high_mean": 0.0008868069380696397, "clip_ratio/low_mean": 0.0005782373882539105, "clip_ratio/low_min": 4.339197948866058e-05, "clip_ratio/region_mean": 0.0014650443154096138, "epoch": 4.345189504373177, "grad_norm": 0.14213694632053375, "learning_rate": 1e-06, "loss": -0.0502, "step": 421 }, { "clip_ratio/high_max": 0.001997954554099124, "clip_ratio/high_mean": 0.0007778162162139779, "clip_ratio/low_mean": 0.0006084981364438136, "clip_ratio/low_min": 1.3496005522029009e-05, "clip_ratio/region_mean": 0.001386314324918203, "epoch": 4.354518950437318, "grad_norm": 0.14458414912223816, "learning_rate": 1e-06, "loss": 0.0059, "step": 422 }, { "clip_ratio/high_max": 0.002282510933582671, "clip_ratio/high_mean": 0.0008797499012871413, "clip_ratio/low_mean": 0.0007325051483348943, "clip_ratio/low_min": 8.304211678478168e-05, "clip_ratio/region_mean": 0.001612254993233364, "epoch": 4.363848396501457, "grad_norm": 0.1361098289489746, "learning_rate": 1e-06, "loss": 0.0166, "step": 423 }, { "clip_ratio/high_max": 0.0021464546589413658, "clip_ratio/high_mean": 0.0009247475536540151, "clip_ratio/low_mean": 0.0007555866613984108, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.0016803342005005106, "epoch": 4.373177842565598, "grad_norm": 0.13864639401435852, "learning_rate": 1e-06, "loss": -0.0638, "step": 424 }, { "clip_ratio/high_max": 0.0020798289515369106, "clip_ratio/high_mean": 0.0007785744546708884, "clip_ratio/low_mean": 0.0006930821127753006, "clip_ratio/low_min": 1.088281351258047e-05, "clip_ratio/region_mean": 0.0014716565528942738, "epoch": 4.382507288629737, "grad_norm": 0.13775387406349182, "learning_rate": 1e-06, "loss": 0.0151, "step": 425 }, { "clip_ratio/high_max": 0.0022037633825675584, "clip_ratio/high_mean": 0.0008594255996285938, "clip_ratio/low_mean": 0.0007563193266832968, "clip_ratio/low_min": 1.1929757420148235e-05, "clip_ratio/region_mean": 0.0016157449208549224, "epoch": 4.391836734693878, "grad_norm": 0.13735786080360413, "learning_rate": 1e-06, "loss": -0.0284, "step": 426 }, { "clip_ratio/high_max": 0.0025244092830689624, "clip_ratio/high_mean": 0.0009236256264557596, "clip_ratio/low_mean": 0.0007384982072835555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001662123828282347, "epoch": 4.401166180758017, "grad_norm": 0.1344793289899826, "learning_rate": 1e-06, "loss": -0.0133, "step": 427 }, { "clip_ratio/high_max": 0.001960578767466359, "clip_ratio/high_mean": 0.0008293927821796387, "clip_ratio/low_mean": 0.0007592314559587976, "clip_ratio/low_min": 1.1759172593883704e-05, "clip_ratio/region_mean": 0.0015886242072156165, "epoch": 4.410495626822158, "grad_norm": 0.14428822696208954, "learning_rate": 1e-06, "loss": 0.0182, "step": 428 }, { "clip_ratio/high_max": 0.0017354351984977257, "clip_ratio/high_mean": 0.0007769427083985647, "clip_ratio/low_mean": 0.000884910741206113, "clip_ratio/low_min": 0.00010683977052394766, "clip_ratio/region_mean": 0.0016618534427834675, "epoch": 4.419825072886297, "grad_norm": 0.14362578094005585, "learning_rate": 1e-06, "loss": 0.0018, "step": 429 }, { "clip_ratio/high_max": 0.002135111062671058, "clip_ratio/high_mean": 0.0008419767727900762, "clip_ratio/low_mean": 0.0009014563711389201, "clip_ratio/low_min": 6.603633300983347e-05, "clip_ratio/region_mean": 0.0017434331457479857, "epoch": 4.429154518950437, "grad_norm": 0.14543834328651428, "learning_rate": 1e-06, "loss": 0.0292, "step": 430 }, { "clip_ratio/high_max": 0.002382848622801248, "clip_ratio/high_mean": 0.0010337124149373267, "clip_ratio/low_mean": 0.0009170528774120612, "clip_ratio/low_min": 2.4930195650085807e-05, "clip_ratio/region_mean": 0.0019507653269101866, "epoch": 4.438483965014577, "grad_norm": 0.13290520012378693, "learning_rate": 1e-06, "loss": -0.0074, "step": 431 }, { "clip_ratio/high_max": 0.0020464904300752096, "clip_ratio/high_mean": 0.000794947378381039, "clip_ratio/low_mean": 0.0009633779918658547, "clip_ratio/low_min": 6.955102981009986e-05, "clip_ratio/region_mean": 0.0017583253902557772, "epoch": 4.447813411078717, "grad_norm": 0.13515394926071167, "learning_rate": 1e-06, "loss": 0.0327, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4042.0, "completions/mean_length": 659.9826049804688, "completions/mean_terminated_length": 557.2969970703125, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 4.457142857142857, "grad_norm": 0.13635966181755066, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 262284637.0, "reward": 0.6302316188812256, "reward_std": 0.16895326972007751, "rewards/simpleverify_reward/mean": 0.6302315592765808, "rewards/simpleverify_reward/std": 0.4827587604522705, "step": 433 }, { "clip_ratio/high_max": 0.0018946665004477836, "clip_ratio/high_mean": 0.0008593821039539762, "clip_ratio/low_mean": 0.0004978385914000683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013572206989920232, "epoch": 4.466472303206997, "grad_norm": 0.14646746218204498, "learning_rate": 1e-06, "loss": -0.0406, "step": 434 }, { "clip_ratio/high_max": 0.00181792874718667, "clip_ratio/high_mean": 0.0006722836169501534, "clip_ratio/low_mean": 0.000585532985496684, "clip_ratio/low_min": 2.3423782295139972e-05, "clip_ratio/region_mean": 0.0012578166060848162, "epoch": 4.475801749271137, "grad_norm": 0.14329630136489868, "learning_rate": 1e-06, "loss": 0.0404, "step": 435 }, { "clip_ratio/high_max": 0.0018812880844052415, "clip_ratio/high_mean": 0.0006947350411792286, "clip_ratio/low_mean": 0.0004510954686338664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011458305089036003, "epoch": 4.485131195335277, "grad_norm": 0.14388902485370636, "learning_rate": 1e-06, "loss": -0.0306, "step": 436 }, { "clip_ratio/high_max": 0.0019834737795463298, "clip_ratio/high_mean": 0.0008112781524687307, "clip_ratio/low_mean": 0.0005033688348703436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013146469827916007, "epoch": 4.494460641399417, "grad_norm": 0.13579969108104706, "learning_rate": 1e-06, "loss": -0.0359, "step": 437 }, { "clip_ratio/high_max": 0.0018941608432214707, "clip_ratio/high_mean": 0.0007409790268866345, "clip_ratio/low_mean": 0.0006190713393152691, "clip_ratio/low_min": 2.7533040338312276e-05, "clip_ratio/region_mean": 0.0013600503880297765, "epoch": 4.503790087463557, "grad_norm": 0.14690448343753815, "learning_rate": 1e-06, "loss": 0.0234, "step": 438 }, { "clip_ratio/high_max": 0.001990746644878527, "clip_ratio/high_mean": 0.000725812202290399, "clip_ratio/low_mean": 0.000508296218868054, "clip_ratio/low_min": 1.6473379218950868e-05, "clip_ratio/region_mean": 0.0012341084257059265, "epoch": 4.513119533527696, "grad_norm": 0.1355036348104477, "learning_rate": 1e-06, "loss": -0.0082, "step": 439 }, { "clip_ratio/high_max": 0.002018020059040282, "clip_ratio/high_mean": 0.0007833459494577255, "clip_ratio/low_mean": 0.0005103191670059459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001293665114644682, "epoch": 4.522448979591837, "grad_norm": 0.13605257868766785, "learning_rate": 1e-06, "loss": -0.0336, "step": 440 }, { "clip_ratio/high_max": 0.0016423587112512905, "clip_ratio/high_mean": 0.0007220258230518084, "clip_ratio/low_mean": 0.0005902602279093117, "clip_ratio/low_min": 1.0858235327759758e-05, "clip_ratio/region_mean": 0.0013122860655130353, "epoch": 4.531778425655976, "grad_norm": 0.12736156582832336, "learning_rate": 1e-06, "loss": -0.013, "step": 441 }, { "clip_ratio/high_max": 0.0019386203784961253, "clip_ratio/high_mean": 0.0007939001970953541, "clip_ratio/low_mean": 0.0007155664752644952, "clip_ratio/low_min": 2.2194602934177965e-05, "clip_ratio/region_mean": 0.0015094666741788387, "epoch": 4.541107871720117, "grad_norm": 0.1547626256942749, "learning_rate": 1e-06, "loss": 0.002, "step": 442 }, { "clip_ratio/high_max": 0.0022937585308682173, "clip_ratio/high_mean": 0.0008172079087671591, "clip_ratio/low_mean": 0.0007018830933702702, "clip_ratio/low_min": 1.9737881302717142e-05, "clip_ratio/region_mean": 0.001519091019872576, "epoch": 4.550437317784256, "grad_norm": 0.13925854861736298, "learning_rate": 1e-06, "loss": -0.0359, "step": 443 }, { "clip_ratio/high_max": 0.001896369500173023, "clip_ratio/high_mean": 0.000751575516005687, "clip_ratio/low_mean": 0.0007698241170146503, "clip_ratio/low_min": 6.108340585342376e-05, "clip_ratio/region_mean": 0.0015213996157399379, "epoch": 4.559766763848397, "grad_norm": 0.12260023504495621, "learning_rate": 1e-06, "loss": -0.0122, "step": 444 }, { "clip_ratio/high_max": 0.0021567277362919413, "clip_ratio/high_mean": 0.000779573656473076, "clip_ratio/low_mean": 0.0008380481322092237, "clip_ratio/low_min": 0.00012170905574748758, "clip_ratio/region_mean": 0.001617621754121501, "epoch": 4.569096209912536, "grad_norm": 0.15853457152843475, "learning_rate": 1e-06, "loss": 0.0377, "step": 445 }, { "clip_ratio/high_max": 0.0019760520044656005, "clip_ratio/high_mean": 0.0007717506468907231, "clip_ratio/low_mean": 0.0008252071784227155, "clip_ratio/low_min": 4.372414150566328e-05, "clip_ratio/region_mean": 0.0015969578707881738, "epoch": 4.578425655976677, "grad_norm": 0.13059084117412567, "learning_rate": 1e-06, "loss": 0.0324, "step": 446 }, { "clip_ratio/high_max": 0.0022495392622658983, "clip_ratio/high_mean": 0.000840968152260757, "clip_ratio/low_mean": 0.0009696445149529609, "clip_ratio/low_min": 2.541683534218464e-05, "clip_ratio/region_mean": 0.001810612651752308, "epoch": 4.587755102040816, "grad_norm": 0.14084604382514954, "learning_rate": 1e-06, "loss": 0.0181, "step": 447 }, { "clip_ratio/high_max": 0.0020552932037389837, "clip_ratio/high_mean": 0.0008751566110731801, "clip_ratio/low_mean": 0.0007946431469463278, "clip_ratio/low_min": 3.263377948314883e-05, "clip_ratio/region_mean": 0.0016697997598384973, "epoch": 4.597084548104956, "grad_norm": 0.1383851319551468, "learning_rate": 1e-06, "loss": -0.0004, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0326450892857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 676.7257080078125, "completions/mean_terminated_length": 561.3362426757812, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 4.606413994169096, "grad_norm": 0.15614677965641022, "learning_rate": 1e-06, "loss": 0.0497, "num_tokens": 271523112.0, "reward": 0.6095842719078064, "reward_std": 0.17822805047035217, "rewards/simpleverify_reward/mean": 0.6095842719078064, "rewards/simpleverify_reward/std": 0.4878605306148529, "step": 449 }, { "clip_ratio/high_max": 0.0017971346715057734, "clip_ratio/high_mean": 0.0006697959524899488, "clip_ratio/low_mean": 0.0005843023595843988, "clip_ratio/low_min": 4.758385694003664e-05, "clip_ratio/region_mean": 0.001254098297067685, "epoch": 4.615743440233236, "grad_norm": 0.15114061534404755, "learning_rate": 1e-06, "loss": -0.0074, "step": 450 }, { "clip_ratio/high_max": 0.0019544661809050012, "clip_ratio/high_mean": 0.0007528217502112966, "clip_ratio/low_mean": 0.0006265363108468591, "clip_ratio/low_min": 5.296774907037616e-05, "clip_ratio/region_mean": 0.0013793580437777564, "epoch": 4.625072886297376, "grad_norm": 0.1447070837020874, "learning_rate": 1e-06, "loss": -0.0251, "step": 451 }, { "clip_ratio/high_max": 0.001886899299279321, "clip_ratio/high_mean": 0.000755733877667808, "clip_ratio/low_mean": 0.0005922255013501854, "clip_ratio/low_min": 2.2706630261382088e-05, "clip_ratio/region_mean": 0.0013479594163072761, "epoch": 4.634402332361516, "grad_norm": 0.14599715173244476, "learning_rate": 1e-06, "loss": -0.0268, "step": 452 }, { "clip_ratio/high_max": 0.002206432865932584, "clip_ratio/high_mean": 0.0008385329201701097, "clip_ratio/low_mean": 0.0006779280411137734, "clip_ratio/low_min": 1.4137072867015377e-05, "clip_ratio/region_mean": 0.0015164609576459043, "epoch": 4.643731778425656, "grad_norm": 0.1516730785369873, "learning_rate": 1e-06, "loss": -0.0061, "step": 453 }, { "clip_ratio/high_max": 0.0017078085256798659, "clip_ratio/high_mean": 0.0006799944148951909, "clip_ratio/low_mean": 0.0006824211868661223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013624155981233343, "epoch": 4.653061224489796, "grad_norm": 0.14161062240600586, "learning_rate": 1e-06, "loss": -0.0093, "step": 454 }, { "clip_ratio/high_max": 0.0019095331663265824, "clip_ratio/high_mean": 0.0007660757091798587, "clip_ratio/low_mean": 0.000732026786863571, "clip_ratio/low_min": 1.3751375263382215e-05, "clip_ratio/region_mean": 0.0014981024833105039, "epoch": 4.662390670553936, "grad_norm": 0.13042862713336945, "learning_rate": 1e-06, "loss": -0.0212, "step": 455 }, { "clip_ratio/high_max": 0.002017461316427216, "clip_ratio/high_mean": 0.0008329216288984753, "clip_ratio/low_mean": 0.0007611829987581586, "clip_ratio/low_min": 7.003199425525963e-05, "clip_ratio/region_mean": 0.0015941046585794538, "epoch": 4.671720116618076, "grad_norm": 0.1487365961074829, "learning_rate": 1e-06, "loss": -0.0368, "step": 456 }, { "clip_ratio/high_max": 0.0016639782079437282, "clip_ratio/high_mean": 0.0007071169675327837, "clip_ratio/low_mean": 0.0008511025225743651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015582194828311913, "epoch": 4.681049562682215, "grad_norm": 0.14309902489185333, "learning_rate": 1e-06, "loss": 0.0416, "step": 457 }, { "clip_ratio/high_max": 0.002048715963610448, "clip_ratio/high_mean": 0.0008234525284933625, "clip_ratio/low_mean": 0.0007788235052430537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016022760501073208, "epoch": 4.690379008746356, "grad_norm": 0.1428535282611847, "learning_rate": 1e-06, "loss": -0.0129, "step": 458 }, { "clip_ratio/high_max": 0.001936749518790748, "clip_ratio/high_mean": 0.0008058685962168965, "clip_ratio/low_mean": 0.0007987127919477643, "clip_ratio/low_min": 3.374441439518705e-05, "clip_ratio/region_mean": 0.0016045813790697139, "epoch": 4.699708454810495, "grad_norm": 0.1306166648864746, "learning_rate": 1e-06, "loss": 0.0131, "step": 459 }, { "clip_ratio/high_max": 0.0026468644173291977, "clip_ratio/high_mean": 0.0010301840393367456, "clip_ratio/low_mean": 0.0008422299579251558, "clip_ratio/low_min": 0.00011694230033754138, "clip_ratio/region_mean": 0.001872413995442912, "epoch": 4.709037900874636, "grad_norm": 0.14510478079319, "learning_rate": 1e-06, "loss": -0.0314, "step": 460 }, { "clip_ratio/high_max": 0.0020757008678629063, "clip_ratio/high_mean": 0.0008239636626967695, "clip_ratio/low_mean": 0.0008433632101514377, "clip_ratio/low_min": 3.7991950193827506e-05, "clip_ratio/region_mean": 0.001667326876486186, "epoch": 4.718367346938775, "grad_norm": 0.14492084085941315, "learning_rate": 1e-06, "loss": -0.0, "step": 461 }, { "clip_ratio/high_max": 0.0020391212710819673, "clip_ratio/high_mean": 0.0007921024025563383, "clip_ratio/low_mean": 0.0009094626466321643, "clip_ratio/low_min": 0.00014627889140683692, "clip_ratio/region_mean": 0.001701565066468902, "epoch": 4.727696793002916, "grad_norm": 0.1458117961883545, "learning_rate": 1e-06, "loss": 0.0272, "step": 462 }, { "clip_ratio/high_max": 0.002088821493089199, "clip_ratio/high_mean": 0.0008896802719391417, "clip_ratio/low_mean": 0.0008033615413296502, "clip_ratio/low_min": 0.00014060549710848136, "clip_ratio/region_mean": 0.0016930418059928343, "epoch": 4.737026239067055, "grad_norm": 0.13169211149215698, "learning_rate": 1e-06, "loss": -0.0401, "step": 463 }, { "clip_ratio/high_max": 0.0019473216962069273, "clip_ratio/high_mean": 0.0008602636225987226, "clip_ratio/low_mean": 0.0008712820781511255, "clip_ratio/low_min": 4.82867344544502e-05, "clip_ratio/region_mean": 0.0017315457334916573, "epoch": 4.746355685131196, "grad_norm": 0.14157073199748993, "learning_rate": 1e-06, "loss": 0.012, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0331333705357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 673.0486450195312, "completions/mean_terminated_length": 555.7481689453125, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 4.755685131195335, "grad_norm": 0.15441399812698364, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 280664345.0, "reward": 0.6179548501968384, "reward_std": 0.1670873612165451, "rewards/simpleverify_reward/mean": 0.6179547905921936, "rewards/simpleverify_reward/std": 0.48590442538261414, "step": 465 }, { "clip_ratio/high_max": 0.0019839676351693925, "clip_ratio/high_mean": 0.0007772995095365332, "clip_ratio/low_mean": 0.0005723586336898734, "clip_ratio/low_min": 2.061345730908215e-05, "clip_ratio/region_mean": 0.0013496581559593324, "epoch": 4.765014577259475, "grad_norm": 0.14397689700126648, "learning_rate": 1e-06, "loss": -0.0266, "step": 466 }, { "clip_ratio/high_max": 0.0018960342422360554, "clip_ratio/high_mean": 0.0007571441110485466, "clip_ratio/low_mean": 0.0005251715583654004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012823156503145583, "epoch": 4.774344023323615, "grad_norm": 0.1473805010318756, "learning_rate": 1e-06, "loss": -0.0024, "step": 467 }, { "clip_ratio/high_max": 0.0018656092433957383, "clip_ratio/high_mean": 0.000784263023888343, "clip_ratio/low_mean": 0.0005751584722020198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013594214979093522, "epoch": 4.783673469387755, "grad_norm": 0.14232085645198822, "learning_rate": 1e-06, "loss": -0.0124, "step": 468 }, { "clip_ratio/high_max": 0.0018197209628851851, "clip_ratio/high_mean": 0.0007109072048478993, "clip_ratio/low_mean": 0.0005875895758435945, "clip_ratio/low_min": 3.3223347600142006e-05, "clip_ratio/region_mean": 0.0012984968234377448, "epoch": 4.793002915451895, "grad_norm": 0.1366540789604187, "learning_rate": 1e-06, "loss": 0.0244, "step": 469 }, { "clip_ratio/high_max": 0.0017138488692580722, "clip_ratio/high_mean": 0.0006751595956302481, "clip_ratio/low_mean": 0.0005706638767151162, "clip_ratio/low_min": 2.448298346280353e-05, "clip_ratio/region_mean": 0.0012458235032681841, "epoch": 4.802332361516035, "grad_norm": 0.14701710641384125, "learning_rate": 1e-06, "loss": 0.0255, "step": 470 }, { "clip_ratio/high_max": 0.0015790617180755362, "clip_ratio/high_mean": 0.000625983930149232, "clip_ratio/low_mean": 0.0007453359812643612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013713199041376356, "epoch": 4.811661807580175, "grad_norm": 0.13911397755146027, "learning_rate": 1e-06, "loss": 0.0169, "step": 471 }, { "clip_ratio/high_max": 0.002107078064000234, "clip_ratio/high_mean": 0.0008477657174807973, "clip_ratio/low_mean": 0.0006317326915450394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014794983908359427, "epoch": 4.820991253644315, "grad_norm": 0.14063839614391327, "learning_rate": 1e-06, "loss": -0.0218, "step": 472 }, { "clip_ratio/high_max": 0.0018147599548683502, "clip_ratio/high_mean": 0.0007355585403274745, "clip_ratio/low_mean": 0.0007886492912803078, "clip_ratio/low_min": 6.55787116556894e-05, "clip_ratio/region_mean": 0.001524207844340708, "epoch": 4.830320699708455, "grad_norm": 0.14481012523174286, "learning_rate": 1e-06, "loss": -0.0173, "step": 473 }, { "clip_ratio/high_max": 0.002033145268796943, "clip_ratio/high_mean": 0.0008247972655226476, "clip_ratio/low_mean": 0.0008216585920308717, "clip_ratio/low_min": 3.046569509024266e-05, "clip_ratio/region_mean": 0.0016464558502775617, "epoch": 4.839650145772595, "grad_norm": 0.15348529815673828, "learning_rate": 1e-06, "loss": -0.0427, "step": 474 }, { "clip_ratio/high_max": 0.0017868525283120107, "clip_ratio/high_mean": 0.0007209597770270193, "clip_ratio/low_mean": 0.0007241272760438733, "clip_ratio/low_min": 9.21557057154132e-06, "clip_ratio/region_mean": 0.001445087054889882, "epoch": 4.848979591836734, "grad_norm": 0.13291506469249725, "learning_rate": 1e-06, "loss": 0.0005, "step": 475 }, { "clip_ratio/high_max": 0.0019011422773473896, "clip_ratio/high_mean": 0.0007779122151987394, "clip_ratio/low_mean": 0.0007664663953619311, "clip_ratio/low_min": 5.748777221015189e-05, "clip_ratio/region_mean": 0.0015443786178366281, "epoch": 4.858309037900875, "grad_norm": 0.1459950953722, "learning_rate": 1e-06, "loss": -0.0379, "step": 476 }, { "clip_ratio/high_max": 0.0019103054073639214, "clip_ratio/high_mean": 0.0008094209406408481, "clip_ratio/low_mean": 0.0008170108339982107, "clip_ratio/low_min": 1.4730143448105082e-05, "clip_ratio/region_mean": 0.001626431789190974, "epoch": 4.867638483965014, "grad_norm": 0.13664455711841583, "learning_rate": 1e-06, "loss": -0.029, "step": 477 }, { "clip_ratio/high_max": 0.0019490873091854155, "clip_ratio/high_mean": 0.0008089427301456453, "clip_ratio/low_mean": 0.000891122777829878, "clip_ratio/low_min": 4.885913585894741e-05, "clip_ratio/region_mean": 0.001700065542536322, "epoch": 4.876967930029155, "grad_norm": 0.15025298297405243, "learning_rate": 1e-06, "loss": 0.0013, "step": 478 }, { "clip_ratio/high_max": 0.0018821940393536352, "clip_ratio/high_mean": 0.0007020293596724514, "clip_ratio/low_mean": 0.000832925161375897, "clip_ratio/low_min": 3.9825827116146684e-05, "clip_ratio/region_mean": 0.0015349545392382424, "epoch": 4.886297376093294, "grad_norm": 0.13251133263111115, "learning_rate": 1e-06, "loss": 0.0043, "step": 479 }, { "clip_ratio/high_max": 0.0020220892583893146, "clip_ratio/high_mean": 0.0007700880632910412, "clip_ratio/low_mean": 0.0008671174891787814, "clip_ratio/low_min": 5.373757812776603e-05, "clip_ratio/region_mean": 0.0016372055251849815, "epoch": 4.895626822157435, "grad_norm": 0.13826870918273926, "learning_rate": 1e-06, "loss": 0.0054, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0322265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 672.4264526367188, "completions/mean_terminated_length": 558.4224243164062, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 5.0093294460641395, "grad_norm": 0.13976632058620453, "learning_rate": 1e-06, "loss": 0.0241, "num_tokens": 289816498.0, "reward": 0.6243025064468384, "reward_std": 0.1592639982700348, "rewards/simpleverify_reward/mean": 0.6243024468421936, "rewards/simpleverify_reward/std": 0.48431938886642456, "step": 481 }, { "clip_ratio/high_max": 0.0016235597504419275, "clip_ratio/high_mean": 0.0005538976256502792, "clip_ratio/low_mean": 0.0005086276605652529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010625252816680586, "epoch": 5.01865889212828, "grad_norm": 0.13736383616924286, "learning_rate": 1e-06, "loss": 0.0464, "step": 482 }, { "clip_ratio/high_max": 0.0016724231609259732, "clip_ratio/high_mean": 0.0007470945565728471, "clip_ratio/low_mean": 0.00048790406799525954, "clip_ratio/low_min": 1.5277439160854556e-05, "clip_ratio/region_mean": 0.0012349986209301278, "epoch": 5.0279883381924195, "grad_norm": 0.14073507487773895, "learning_rate": 1e-06, "loss": 0.001, "step": 483 }, { "clip_ratio/high_max": 0.00203435913863359, "clip_ratio/high_mean": 0.0007553153609478613, "clip_ratio/low_mean": 0.0005783905253338162, "clip_ratio/low_min": 1.0118180398421828e-05, "clip_ratio/region_mean": 0.0013337058735487517, "epoch": 5.03731778425656, "grad_norm": 0.1481626182794571, "learning_rate": 1e-06, "loss": -0.0019, "step": 484 }, { "clip_ratio/high_max": 0.001666707332333317, "clip_ratio/high_mean": 0.0006492089587482042, "clip_ratio/low_mean": 0.0005441790790428058, "clip_ratio/low_min": 1.4175549949868582e-05, "clip_ratio/region_mean": 0.0011933880305150524, "epoch": 5.0466472303206995, "grad_norm": 0.13227275013923645, "learning_rate": 1e-06, "loss": 0.0066, "step": 485 }, { "clip_ratio/high_max": 0.0015763674964546226, "clip_ratio/high_mean": 0.0005889023286727024, "clip_ratio/low_mean": 0.0005890971715416526, "clip_ratio/low_min": 1.2432862604327966e-05, "clip_ratio/region_mean": 0.0011779995147662703, "epoch": 5.05597667638484, "grad_norm": 0.13640308380126953, "learning_rate": 1e-06, "loss": 0.0229, "step": 486 }, { "clip_ratio/high_max": 0.0019739524432225153, "clip_ratio/high_mean": 0.0007908672778285109, "clip_ratio/low_mean": 0.000537035165962152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013279024715302512, "epoch": 5.0653061224489795, "grad_norm": 0.1450178176164627, "learning_rate": 1e-06, "loss": -0.0559, "step": 487 }, { "clip_ratio/high_max": 0.0020815997049794532, "clip_ratio/high_mean": 0.0008869911271176534, "clip_ratio/low_mean": 0.0006001236961310497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014871147941448726, "epoch": 5.07463556851312, "grad_norm": 0.13260257244110107, "learning_rate": 1e-06, "loss": -0.0252, "step": 488 }, { "clip_ratio/high_max": 0.0018326759018236771, "clip_ratio/high_mean": 0.000703425186657114, "clip_ratio/low_mean": 0.0006060886353225214, "clip_ratio/low_min": 4.160383741691476e-05, "clip_ratio/region_mean": 0.0013095138274366036, "epoch": 5.0839650145772595, "grad_norm": 0.13075664639472961, "learning_rate": 1e-06, "loss": -0.037, "step": 489 }, { "clip_ratio/high_max": 0.0018462458265275927, "clip_ratio/high_mean": 0.000595511740357324, "clip_ratio/low_mean": 0.0007356997593888082, "clip_ratio/low_min": 9.097525435208809e-06, "clip_ratio/region_mean": 0.0013312115297594573, "epoch": 5.093294460641399, "grad_norm": 0.4296455979347229, "learning_rate": 1e-06, "loss": 0.0217, "step": 490 }, { "clip_ratio/high_max": 0.0018608229693199974, "clip_ratio/high_mean": 0.0007551650514869834, "clip_ratio/low_mean": 0.0007764690744807012, "clip_ratio/low_min": 1.573118606756907e-05, "clip_ratio/region_mean": 0.0015316341377911158, "epoch": 5.1026239067055394, "grad_norm": 0.15017586946487427, "learning_rate": 1e-06, "loss": -0.0012, "step": 491 }, { "clip_ratio/high_max": 0.0018718126193562057, "clip_ratio/high_mean": 0.0007299994958884781, "clip_ratio/low_mean": 0.0007160967443269328, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014460962374869268, "epoch": 5.111953352769679, "grad_norm": 0.1509178876876831, "learning_rate": 1e-06, "loss": 0.0181, "step": 492 }, { "clip_ratio/high_max": 0.0019592590469983406, "clip_ratio/high_mean": 0.0007759682484902442, "clip_ratio/low_mean": 0.0008280744914372917, "clip_ratio/low_min": 4.67232430310105e-05, "clip_ratio/region_mean": 0.0016040426853578538, "epoch": 5.121282798833819, "grad_norm": 0.1408454179763794, "learning_rate": 1e-06, "loss": -0.0118, "step": 493 }, { "clip_ratio/high_max": 0.0020561928904498927, "clip_ratio/high_mean": 0.0008469525710097514, "clip_ratio/low_mean": 0.000798242390374071, "clip_ratio/low_min": 2.9804483347106725e-05, "clip_ratio/region_mean": 0.0016451949559268542, "epoch": 5.130612244897959, "grad_norm": 0.1301165372133255, "learning_rate": 1e-06, "loss": -0.0356, "step": 494 }, { "clip_ratio/high_max": 0.0022841512982267886, "clip_ratio/high_mean": 0.0009503140099695884, "clip_ratio/low_mean": 0.0008428840956185013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017931981346919201, "epoch": 5.139941690962099, "grad_norm": 0.14305350184440613, "learning_rate": 1e-06, "loss": -0.0309, "step": 495 }, { "clip_ratio/high_max": 0.0017077335905923974, "clip_ratio/high_mean": 0.0008324627651745686, "clip_ratio/low_mean": 0.0007369823670160258, "clip_ratio/low_min": 8.02285303507233e-05, "clip_ratio/region_mean": 0.0015694451940362342, "epoch": 5.149271137026239, "grad_norm": 0.13594989478588104, "learning_rate": 1e-06, "loss": -0.0262, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0385044642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 703.8229370117188, "completions/mean_terminated_length": 567.9783325195312, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 5.158600583090379, "grad_norm": 0.1538778692483902, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 299066719.0, "reward": 0.619698703289032, "reward_std": 0.17269377410411835, "rewards/simpleverify_reward/mean": 0.6196986436843872, "rewards/simpleverify_reward/std": 0.4854777753353119, "step": 497 }, { "clip_ratio/high_max": 0.001949931145645678, "clip_ratio/high_mean": 0.0007668700400245143, "clip_ratio/low_mean": 0.0005396453198045492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013065153725619894, "epoch": 5.167930029154519, "grad_norm": 0.13567812740802765, "learning_rate": 1e-06, "loss": 0.0046, "step": 498 }, { "clip_ratio/high_max": 0.001670276076765731, "clip_ratio/high_mean": 0.0006760883561582887, "clip_ratio/low_mean": 0.0005455454756884137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012216338363941759, "epoch": 5.1772594752186585, "grad_norm": 0.13104479014873505, "learning_rate": 1e-06, "loss": 0.0077, "step": 499 }, { "clip_ratio/high_max": 0.0016863510682014748, "clip_ratio/high_mean": 0.0007026087519079738, "clip_ratio/low_mean": 0.0005913464883633424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012939552470925264, "epoch": 5.186588921282799, "grad_norm": 0.1508387327194214, "learning_rate": 1e-06, "loss": 0.0159, "step": 500 }, { "clip_ratio/high_max": 0.0016509741071786266, "clip_ratio/high_mean": 0.0007051424054225208, "clip_ratio/low_mean": 0.0005549897850869456, "clip_ratio/low_min": 1.7898053556564264e-05, "clip_ratio/region_mean": 0.0012601321941474453, "epoch": 5.1959183673469385, "grad_norm": 0.14414073526859283, "learning_rate": 1e-06, "loss": 0.0017, "step": 501 }, { "clip_ratio/high_max": 0.0017127286992035806, "clip_ratio/high_mean": 0.0006916167603776557, "clip_ratio/low_mean": 0.0006156166637083516, "clip_ratio/low_min": 2.8004478735965677e-05, "clip_ratio/region_mean": 0.001307233418629039, "epoch": 5.205247813411079, "grad_norm": 0.1383138746023178, "learning_rate": 1e-06, "loss": 0.0038, "step": 502 }, { "clip_ratio/high_max": 0.001998745930904988, "clip_ratio/high_mean": 0.0007682386221858906, "clip_ratio/low_mean": 0.0006688440753350733, "clip_ratio/low_min": 2.9862944757041987e-05, "clip_ratio/region_mean": 0.0014370827193488367, "epoch": 5.214577259475218, "grad_norm": 0.15552794933319092, "learning_rate": 1e-06, "loss": 0.0102, "step": 503 }, { "clip_ratio/high_max": 0.002330486495338846, "clip_ratio/high_mean": 0.0009034157546921051, "clip_ratio/low_mean": 0.0005751367898483295, "clip_ratio/low_min": 1.2823143151763361e-05, "clip_ratio/region_mean": 0.0014785525163460989, "epoch": 5.223906705539359, "grad_norm": 0.14524514973163605, "learning_rate": 1e-06, "loss": 0.0069, "step": 504 }, { "clip_ratio/high_max": 0.0020312029919296037, "clip_ratio/high_mean": 0.0008479661009914707, "clip_ratio/low_mean": 0.0007643786684639053, "clip_ratio/low_min": 2.9413647098408546e-05, "clip_ratio/region_mean": 0.0016123447312565986, "epoch": 5.233236151603498, "grad_norm": 0.13514263927936554, "learning_rate": 1e-06, "loss": -0.0209, "step": 505 }, { "clip_ratio/high_max": 0.001967790878552478, "clip_ratio/high_mean": 0.0007158497228374472, "clip_ratio/low_mean": 0.0007350029354711296, "clip_ratio/low_min": 1.1409273611207027e-05, "clip_ratio/region_mean": 0.0014508526583085768, "epoch": 5.242565597667639, "grad_norm": 0.13711833953857422, "learning_rate": 1e-06, "loss": 0.0007, "step": 506 }, { "clip_ratio/high_max": 0.0023395628013531677, "clip_ratio/high_mean": 0.0010446385240356904, "clip_ratio/low_mean": 0.0005814382466269308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016260767661151476, "epoch": 5.251895043731778, "grad_norm": 0.14093568921089172, "learning_rate": 1e-06, "loss": -0.12, "step": 507 }, { "clip_ratio/high_max": 0.0020162748769507743, "clip_ratio/high_mean": 0.0008477185810988885, "clip_ratio/low_mean": 0.0007300781835510861, "clip_ratio/low_min": 4.7455998355872e-05, "clip_ratio/region_mean": 0.001577796741912607, "epoch": 5.261224489795918, "grad_norm": 0.14306123554706573, "learning_rate": 1e-06, "loss": -0.0121, "step": 508 }, { "clip_ratio/high_max": 0.002668672656000126, "clip_ratio/high_mean": 0.0010387697402620688, "clip_ratio/low_mean": 0.0007301300338440342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00176889978320105, "epoch": 5.270553935860058, "grad_norm": 0.13781116902828217, "learning_rate": 1e-06, "loss": -0.057, "step": 509 }, { "clip_ratio/high_max": 0.0019868202398356516, "clip_ratio/high_mean": 0.0008478321542497724, "clip_ratio/low_mean": 0.0008573595951020252, "clip_ratio/low_min": 1.8562519471743144e-05, "clip_ratio/region_mean": 0.0017051917748176493, "epoch": 5.279883381924198, "grad_norm": 0.140810027718544, "learning_rate": 1e-06, "loss": 0.0205, "step": 510 }, { "clip_ratio/high_max": 0.002324477449292317, "clip_ratio/high_mean": 0.0009168664564640494, "clip_ratio/low_mean": 0.0008821229748718906, "clip_ratio/low_min": 8.027652711461997e-05, "clip_ratio/region_mean": 0.0017989894258789718, "epoch": 5.289212827988338, "grad_norm": 0.1484432965517044, "learning_rate": 1e-06, "loss": 0.0125, "step": 511 }, { "clip_ratio/high_max": 0.0018135896098101512, "clip_ratio/high_mean": 0.0008001307851372985, "clip_ratio/low_mean": 0.0007855077301428537, "clip_ratio/low_min": 3.455169644439593e-05, "clip_ratio/region_mean": 0.0015856385107326787, "epoch": 5.298542274052478, "grad_norm": 0.1377202272415161, "learning_rate": 1e-06, "loss": -0.0252, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03857421875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 694.9124145507812, "completions/mean_terminated_length": 558.4542846679688, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 5.307871720116618, "grad_norm": 0.14509792625904083, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 308189559.0, "reward": 0.6257673501968384, "reward_std": 0.15382085740566254, "rewards/simpleverify_reward/mean": 0.6257672905921936, "rewards/simpleverify_reward/std": 0.48394104838371277, "step": 513 }, { "clip_ratio/high_max": 0.001831252968258923, "clip_ratio/high_mean": 0.0007021787605481222, "clip_ratio/low_mean": 0.0004104631098016398, "clip_ratio/low_min": 1.5049361536512151e-05, "clip_ratio/region_mean": 0.0011126418867206667, "epoch": 5.317201166180758, "grad_norm": 0.132576122879982, "learning_rate": 1e-06, "loss": -0.025, "step": 514 }, { "clip_ratio/high_max": 0.0019213679661334027, "clip_ratio/high_mean": 0.0007423789547829074, "clip_ratio/low_mean": 0.0005459523526951671, "clip_ratio/low_min": 1.5866971807554364e-05, "clip_ratio/region_mean": 0.0012883312774647493, "epoch": 5.326530612244898, "grad_norm": 0.13921859860420227, "learning_rate": 1e-06, "loss": -0.0552, "step": 515 }, { "clip_ratio/high_max": 0.0017202495873789303, "clip_ratio/high_mean": 0.0006510657312901458, "clip_ratio/low_mean": 0.00044724181316269096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010983075808326248, "epoch": 5.335860058309038, "grad_norm": 0.13655169308185577, "learning_rate": 1e-06, "loss": -0.0173, "step": 516 }, { "clip_ratio/high_max": 0.0016500340389029589, "clip_ratio/high_mean": 0.0006498305519926362, "clip_ratio/low_mean": 0.0005137034022482112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011635339506028686, "epoch": 5.345189504373177, "grad_norm": 0.13832750916481018, "learning_rate": 1e-06, "loss": 0.0041, "step": 517 }, { "clip_ratio/high_max": 0.0015886834826233098, "clip_ratio/high_mean": 0.0006011972063788562, "clip_ratio/low_mean": 0.0005696543394151377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011708515503414674, "epoch": 5.354518950437318, "grad_norm": 0.12215995788574219, "learning_rate": 1e-06, "loss": 0.0041, "step": 518 }, { "clip_ratio/high_max": 0.0017466730678279418, "clip_ratio/high_mean": 0.0006969935602683108, "clip_ratio/low_mean": 0.0005868832940905122, "clip_ratio/low_min": 9.293680705013685e-06, "clip_ratio/region_mean": 0.0012838768416258972, "epoch": 5.363848396501457, "grad_norm": 0.1546134352684021, "learning_rate": 1e-06, "loss": -0.0277, "step": 519 }, { "clip_ratio/high_max": 0.0017229815366590628, "clip_ratio/high_mean": 0.0006109916248533409, "clip_ratio/low_mean": 0.0007116498654795578, "clip_ratio/low_min": 1.4057580301596317e-05, "clip_ratio/region_mean": 0.0013226414885139093, "epoch": 5.373177842565598, "grad_norm": 0.14277414977550507, "learning_rate": 1e-06, "loss": 0.0032, "step": 520 }, { "clip_ratio/high_max": 0.001673447601206135, "clip_ratio/high_mean": 0.0006610967602682649, "clip_ratio/low_mean": 0.0005499726776179159, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012110694551665802, "epoch": 5.382507288629737, "grad_norm": 0.13254442811012268, "learning_rate": 1e-06, "loss": -0.0027, "step": 521 }, { "clip_ratio/high_max": 0.0017601528816157952, "clip_ratio/high_mean": 0.0007635183646925725, "clip_ratio/low_mean": 0.0006548195724462857, "clip_ratio/low_min": 1.3997760106576607e-05, "clip_ratio/region_mean": 0.0014183379571477417, "epoch": 5.391836734693878, "grad_norm": 0.132960245013237, "learning_rate": 1e-06, "loss": -0.027, "step": 522 }, { "clip_ratio/high_max": 0.0020753175995196216, "clip_ratio/high_mean": 0.0008083586080829264, "clip_ratio/low_mean": 0.0006855880346847698, "clip_ratio/low_min": 1.8058364730677567e-05, "clip_ratio/region_mean": 0.0014939466273062862, "epoch": 5.401166180758017, "grad_norm": 0.15483099222183228, "learning_rate": 1e-06, "loss": 0.0236, "step": 523 }, { "clip_ratio/high_max": 0.001550869728816906, "clip_ratio/high_mean": 0.0006023825608281186, "clip_ratio/low_mean": 0.0007375161458185175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001339898710284615, "epoch": 5.410495626822158, "grad_norm": 0.149322509765625, "learning_rate": 1e-06, "loss": -0.0063, "step": 524 }, { "clip_ratio/high_max": 0.0019394854825804941, "clip_ratio/high_mean": 0.0007082460915626143, "clip_ratio/low_mean": 0.0007132144392016926, "clip_ratio/low_min": 1.622955096536316e-05, "clip_ratio/region_mean": 0.001421460536221275, "epoch": 5.419825072886297, "grad_norm": 0.14261841773986816, "learning_rate": 1e-06, "loss": 0.017, "step": 525 }, { "clip_ratio/high_max": 0.0018902633855759632, "clip_ratio/high_mean": 0.0007288329470611643, "clip_ratio/low_mean": 0.0007327030325541273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014615360014431644, "epoch": 5.429154518950437, "grad_norm": 0.13962894678115845, "learning_rate": 1e-06, "loss": -0.0073, "step": 526 }, { "clip_ratio/high_max": 0.0021223561525403056, "clip_ratio/high_mean": 0.0008140422578435391, "clip_ratio/low_mean": 0.0007856173879190465, "clip_ratio/low_min": 2.2486057787318714e-05, "clip_ratio/region_mean": 0.0015996596266631968, "epoch": 5.438483965014577, "grad_norm": 0.13034579157829285, "learning_rate": 1e-06, "loss": -0.0024, "step": 527 }, { "clip_ratio/high_max": 0.0019375484771444462, "clip_ratio/high_mean": 0.0007959874856169336, "clip_ratio/low_mean": 0.0007138545624911785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001509842069935985, "epoch": 5.447813411078717, "grad_norm": 0.12876982986927032, "learning_rate": 1e-06, "loss": -0.032, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0408761160714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 709.1928100585938, "completions/mean_terminated_length": 564.8532104492188, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 5.457142857142857, "grad_norm": 0.1431170105934143, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 317406035.0, "reward": 0.6169782876968384, "reward_std": 0.1713859736919403, "rewards/simpleverify_reward/mean": 0.6169782280921936, "rewards/simpleverify_reward/std": 0.48614048957824707, "step": 529 }, { "clip_ratio/high_max": 0.0019781591108767316, "clip_ratio/high_mean": 0.0007710580885031959, "clip_ratio/low_mean": 0.00048771446745377034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001258772586879786, "epoch": 5.466472303206997, "grad_norm": 0.13045582175254822, "learning_rate": 1e-06, "loss": -0.0524, "step": 530 }, { "clip_ratio/high_max": 0.001924749700265238, "clip_ratio/high_mean": 0.0007688664154557046, "clip_ratio/low_mean": 0.0004975594420102425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012664258429140318, "epoch": 5.475801749271137, "grad_norm": 0.14322207868099213, "learning_rate": 1e-06, "loss": -0.0232, "step": 531 }, { "clip_ratio/high_max": 0.002023437813477358, "clip_ratio/high_mean": 0.0007363194672507234, "clip_ratio/low_mean": 0.0005374439751903992, "clip_ratio/low_min": 1.1651752174657304e-05, "clip_ratio/region_mean": 0.0012737634315271862, "epoch": 5.485131195335277, "grad_norm": 0.1508331596851349, "learning_rate": 1e-06, "loss": 0.0085, "step": 532 }, { "clip_ratio/high_max": 0.0014870895029162057, "clip_ratio/high_mean": 0.0006016110583004775, "clip_ratio/low_mean": 0.0006343551995087182, "clip_ratio/low_min": 1.763046566338744e-05, "clip_ratio/region_mean": 0.0012359662614471745, "epoch": 5.494460641399417, "grad_norm": 0.15868335962295532, "learning_rate": 1e-06, "loss": 0.0752, "step": 533 }, { "clip_ratio/high_max": 0.002118173044436844, "clip_ratio/high_mean": 0.0008420675931120059, "clip_ratio/low_mean": 0.0004903422441202565, "clip_ratio/low_min": 1.659585723245982e-05, "clip_ratio/region_mean": 0.0013324098326847889, "epoch": 5.503790087463557, "grad_norm": 0.15196527540683746, "learning_rate": 1e-06, "loss": -0.0469, "step": 534 }, { "clip_ratio/high_max": 0.002219799360318575, "clip_ratio/high_mean": 0.0008770410513534443, "clip_ratio/low_mean": 0.0005185525460547069, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013955935937701724, "epoch": 5.513119533527696, "grad_norm": 0.15247340500354767, "learning_rate": 1e-06, "loss": -0.018, "step": 535 }, { "clip_ratio/high_max": 0.0021086786036903504, "clip_ratio/high_mean": 0.0008057452005232335, "clip_ratio/low_mean": 0.0006780390795029234, "clip_ratio/low_min": 5.02008042531088e-05, "clip_ratio/region_mean": 0.0014837842900305986, "epoch": 5.522448979591837, "grad_norm": 0.1468881070613861, "learning_rate": 1e-06, "loss": 0.0083, "step": 536 }, { "clip_ratio/high_max": 0.00240861444035545, "clip_ratio/high_mean": 0.0009942740416590823, "clip_ratio/low_mean": 0.000609717569204804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016039916154113598, "epoch": 5.531778425655976, "grad_norm": 0.1336188167333603, "learning_rate": 1e-06, "loss": -0.0596, "step": 537 }, { "clip_ratio/high_max": 0.002228997545898892, "clip_ratio/high_mean": 0.0009610133065507398, "clip_ratio/low_mean": 0.0005389498880958854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014999631966929883, "epoch": 5.541107871720117, "grad_norm": 0.14318431913852692, "learning_rate": 1e-06, "loss": -0.0738, "step": 538 }, { "clip_ratio/high_max": 0.0020502176048466936, "clip_ratio/high_mean": 0.0008949667280830909, "clip_ratio/low_mean": 0.0007913270910648862, "clip_ratio/low_min": 0.00010655957157723606, "clip_ratio/region_mean": 0.001686293846432818, "epoch": 5.550437317784256, "grad_norm": 0.15662632882595062, "learning_rate": 1e-06, "loss": 0.017, "step": 539 }, { "clip_ratio/high_max": 0.0023104576466721483, "clip_ratio/high_mean": 0.000892281628694036, "clip_ratio/low_mean": 0.0007103662865119986, "clip_ratio/low_min": 1.2773350135830697e-05, "clip_ratio/region_mean": 0.0016026479061110877, "epoch": 5.559766763848397, "grad_norm": 0.14540418982505798, "learning_rate": 1e-06, "loss": -0.0092, "step": 540 }, { "clip_ratio/high_max": 0.00204998605477158, "clip_ratio/high_mean": 0.0008966990171757061, "clip_ratio/low_mean": 0.0007506947549700271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016473937721457332, "epoch": 5.569096209912536, "grad_norm": 0.15169177949428558, "learning_rate": 1e-06, "loss": 0.0082, "step": 541 }, { "clip_ratio/high_max": 0.002146163969882764, "clip_ratio/high_mean": 0.0008893636768334545, "clip_ratio/low_mean": 0.0007449127224390395, "clip_ratio/low_min": 2.2944199372432195e-05, "clip_ratio/region_mean": 0.0016342763810826, "epoch": 5.578425655976677, "grad_norm": 0.15504685044288635, "learning_rate": 1e-06, "loss": 0.0037, "step": 542 }, { "clip_ratio/high_max": 0.0018833181820809841, "clip_ratio/high_mean": 0.0008114443662634585, "clip_ratio/low_mean": 0.0007784006666042842, "clip_ratio/low_min": 1.493072159064468e-05, "clip_ratio/region_mean": 0.0015898450146778487, "epoch": 5.587755102040816, "grad_norm": 0.14189599454402924, "learning_rate": 1e-06, "loss": -0.0043, "step": 543 }, { "clip_ratio/high_max": 0.0022297915820672642, "clip_ratio/high_mean": 0.0008458109477942344, "clip_ratio/low_mean": 0.0007284936045834911, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015743045223644003, "epoch": 5.597084548104956, "grad_norm": 0.1389007717370987, "learning_rate": 1e-06, "loss": -0.0029, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0381556919642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 687.2008666992188, "completions/mean_terminated_length": 551.9761352539062, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 5.606413994169096, "grad_norm": 0.17725898325443268, "learning_rate": 1e-06, "loss": 0.0312, "num_tokens": 326459138.0, "reward": 0.6457170844078064, "reward_std": 0.16936585307121277, "rewards/simpleverify_reward/mean": 0.6457170844078064, "rewards/simpleverify_reward/std": 0.4783121347427368, "step": 545 }, { "clip_ratio/high_max": 0.0017584522065590136, "clip_ratio/high_mean": 0.0007313398837140994, "clip_ratio/low_mean": 0.0006019550346536562, "clip_ratio/low_min": 5.4285825171973556e-05, "clip_ratio/region_mean": 0.001333294938376639, "epoch": 5.615743440233236, "grad_norm": 0.15062524378299713, "learning_rate": 1e-06, "loss": 0.0187, "step": 546 }, { "clip_ratio/high_max": 0.0018809732755471487, "clip_ratio/high_mean": 0.0007536968787462683, "clip_ratio/low_mean": 0.0004809531865248573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012346500516287051, "epoch": 5.625072886297376, "grad_norm": 0.1421685516834259, "learning_rate": 1e-06, "loss": -0.0449, "step": 547 }, { "clip_ratio/high_max": 0.00176402713987045, "clip_ratio/high_mean": 0.0006844935032859212, "clip_ratio/low_mean": 0.00042917956943711033, "clip_ratio/low_min": 2.3062730178935453e-05, "clip_ratio/region_mean": 0.0011136730900034308, "epoch": 5.634402332361516, "grad_norm": 0.15451590716838837, "learning_rate": 1e-06, "loss": 0.0185, "step": 548 }, { "clip_ratio/high_max": 0.0021395593066699803, "clip_ratio/high_mean": 0.0008088047306955559, "clip_ratio/low_mean": 0.0005819840262120124, "clip_ratio/low_min": 5.81109597987961e-05, "clip_ratio/region_mean": 0.0013907887623645365, "epoch": 5.643731778425656, "grad_norm": 0.2634983956813812, "learning_rate": 1e-06, "loss": -0.0062, "step": 549 }, { "clip_ratio/high_max": 0.0017449224906158634, "clip_ratio/high_mean": 0.0007505868597945664, "clip_ratio/low_mean": 0.0006282924532570178, "clip_ratio/low_min": 9.50714911596151e-06, "clip_ratio/region_mean": 0.0013788793476123828, "epoch": 5.653061224489796, "grad_norm": 0.14728626608848572, "learning_rate": 1e-06, "loss": -0.0277, "step": 550 }, { "clip_ratio/high_max": 0.0019871132244588807, "clip_ratio/high_mean": 0.0008166052339220187, "clip_ratio/low_mean": 0.0006633276898355689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014799329219385982, "epoch": 5.662390670553936, "grad_norm": 0.16611023247241974, "learning_rate": 1e-06, "loss": 0.0145, "step": 551 }, { "clip_ratio/high_max": 0.0020552559290081263, "clip_ratio/high_mean": 0.0008532996944268234, "clip_ratio/low_mean": 0.0005557777412832365, "clip_ratio/low_min": 9.296444659412373e-06, "clip_ratio/region_mean": 0.0014090773984207772, "epoch": 5.671720116618076, "grad_norm": 0.14784836769104004, "learning_rate": 1e-06, "loss": -0.0595, "step": 552 }, { "clip_ratio/high_max": 0.0021688662127417047, "clip_ratio/high_mean": 0.0007787061695125885, "clip_ratio/low_mean": 0.0007537616438639816, "clip_ratio/low_min": 5.050822801422328e-05, "clip_ratio/region_mean": 0.0015324678024626337, "epoch": 5.681049562682215, "grad_norm": 0.14866214990615845, "learning_rate": 1e-06, "loss": -0.0059, "step": 553 }, { "clip_ratio/high_max": 0.0018929396464955062, "clip_ratio/high_mean": 0.0007722957780060824, "clip_ratio/low_mean": 0.0006826431608715211, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014549389234161936, "epoch": 5.690379008746356, "grad_norm": 0.135148823261261, "learning_rate": 1e-06, "loss": 0.0167, "step": 554 }, { "clip_ratio/high_max": 0.001976162599021336, "clip_ratio/high_mean": 0.000854096855618991, "clip_ratio/low_mean": 0.0008752209978410974, "clip_ratio/low_min": 6.092663716117386e-05, "clip_ratio/region_mean": 0.0017293178680120036, "epoch": 5.699708454810495, "grad_norm": 0.1561211496591568, "learning_rate": 1e-06, "loss": -0.0174, "step": 555 }, { "clip_ratio/high_max": 0.002150235064618755, "clip_ratio/high_mean": 0.0009028514996316517, "clip_ratio/low_mean": 0.0007311141234822571, "clip_ratio/low_min": 4.3701636968762614e-05, "clip_ratio/region_mean": 0.0016339655376214068, "epoch": 5.709037900874636, "grad_norm": 0.14046046137809753, "learning_rate": 1e-06, "loss": -0.0139, "step": 556 }, { "clip_ratio/high_max": 0.0022956999237067066, "clip_ratio/high_mean": 0.0009544809108774643, "clip_ratio/low_mean": 0.0006733261161571136, "clip_ratio/low_min": 1.531487396277953e-05, "clip_ratio/region_mean": 0.001627807036129525, "epoch": 5.718367346938775, "grad_norm": 0.14917157590389252, "learning_rate": 1e-06, "loss": -0.0538, "step": 557 }, { "clip_ratio/high_max": 0.002084125611872878, "clip_ratio/high_mean": 0.0009248094029317144, "clip_ratio/low_mean": 0.0007497738670281251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016745832763263024, "epoch": 5.727696793002916, "grad_norm": 0.14341667294502258, "learning_rate": 1e-06, "loss": -0.0158, "step": 558 }, { "clip_ratio/high_max": 0.002442295357468538, "clip_ratio/high_mean": 0.0010628783275024034, "clip_ratio/low_mean": 0.00077859660541435, "clip_ratio/low_min": 1.686909672571346e-05, "clip_ratio/region_mean": 0.0018414749356452376, "epoch": 5.737026239067055, "grad_norm": 0.1406722515821457, "learning_rate": 1e-06, "loss": -0.0626, "step": 559 }, { "clip_ratio/high_max": 0.002430164495308418, "clip_ratio/high_mean": 0.000937990637794428, "clip_ratio/low_mean": 0.0008982961553556379, "clip_ratio/low_min": 4.513308340392541e-05, "clip_ratio/region_mean": 0.001836286814068444, "epoch": 5.746355685131196, "grad_norm": 0.15397046506404877, "learning_rate": 1e-06, "loss": 0.043, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0355747767857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 679.5607299804688, "completions/mean_terminated_length": 553.5383911132812, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 5.755685131195335, "grad_norm": 0.15682891011238098, "learning_rate": 1e-06, "loss": 0.0199, "num_tokens": 335548376.0, "reward": 0.6443917751312256, "reward_std": 0.15579459071159363, "rewards/simpleverify_reward/mean": 0.6443917155265808, "rewards/simpleverify_reward/std": 0.47871389985084534, "step": 561 }, { "clip_ratio/high_max": 0.0016988049101200886, "clip_ratio/high_mean": 0.0007214375418698182, "clip_ratio/low_mean": 0.000430014332778228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001151451884652488, "epoch": 5.765014577259475, "grad_norm": 0.13908982276916504, "learning_rate": 1e-06, "loss": -0.0274, "step": 562 }, { "clip_ratio/high_max": 0.0017112171790358843, "clip_ratio/high_mean": 0.0007213231438072398, "clip_ratio/low_mean": 0.0004107928712073772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011321160600346047, "epoch": 5.774344023323615, "grad_norm": 0.15235336124897003, "learning_rate": 1e-06, "loss": -0.067, "step": 563 }, { "clip_ratio/high_max": 0.00204794426826993, "clip_ratio/high_mean": 0.0008029576474655187, "clip_ratio/low_mean": 0.000484627987134445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012875856555183418, "epoch": 5.783673469387755, "grad_norm": 0.14975757896900177, "learning_rate": 1e-06, "loss": -0.0124, "step": 564 }, { "clip_ratio/high_max": 0.0019292852593935095, "clip_ratio/high_mean": 0.0007316460350921261, "clip_ratio/low_mean": 0.0005480723439177382, "clip_ratio/low_min": 2.6167050236836076e-05, "clip_ratio/region_mean": 0.0012797183917427901, "epoch": 5.793002915451895, "grad_norm": 0.1502385437488556, "learning_rate": 1e-06, "loss": 0.0094, "step": 565 }, { "clip_ratio/high_max": 0.0017571360549482051, "clip_ratio/high_mean": 0.0007828166581020923, "clip_ratio/low_mean": 0.0004669049549193005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012497216266638134, "epoch": 5.802332361516035, "grad_norm": 0.1393752545118332, "learning_rate": 1e-06, "loss": -0.0376, "step": 566 }, { "clip_ratio/high_max": 0.0019516202592058107, "clip_ratio/high_mean": 0.0007546671040472575, "clip_ratio/low_mean": 0.0006012414587530657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013559085637098178, "epoch": 5.811661807580175, "grad_norm": 0.13896317780017853, "learning_rate": 1e-06, "loss": -0.0264, "step": 567 }, { "clip_ratio/high_max": 0.001807431697670836, "clip_ratio/high_mean": 0.0006318843552435283, "clip_ratio/low_mean": 0.0006801433501095744, "clip_ratio/low_min": 4.479191466089105e-05, "clip_ratio/region_mean": 0.0013120277144480497, "epoch": 5.820991253644315, "grad_norm": 0.12708379328250885, "learning_rate": 1e-06, "loss": 0.0403, "step": 568 }, { "clip_ratio/high_max": 0.0017469987033109646, "clip_ratio/high_mean": 0.000672867523462628, "clip_ratio/low_mean": 0.0005398716730269371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012127391637477558, "epoch": 5.830320699708455, "grad_norm": 0.18970701098442078, "learning_rate": 1e-06, "loss": -0.0398, "step": 569 }, { "clip_ratio/high_max": 0.0018862535398511682, "clip_ratio/high_mean": 0.0008155791583703831, "clip_ratio/low_mean": 0.0006581161687790882, "clip_ratio/low_min": 2.9385720154095907e-05, "clip_ratio/region_mean": 0.0014736953162355348, "epoch": 5.839650145772595, "grad_norm": 0.4579412341117859, "learning_rate": 1e-06, "loss": 0.0028, "step": 570 }, { "clip_ratio/high_max": 0.0016238074713328388, "clip_ratio/high_mean": 0.0006679712332697818, "clip_ratio/low_mean": 0.0005891383980269893, "clip_ratio/low_min": 2.597132697701454e-05, "clip_ratio/region_mean": 0.00125710961947334, "epoch": 5.848979591836734, "grad_norm": 0.13395725190639496, "learning_rate": 1e-06, "loss": -0.0409, "step": 571 }, { "clip_ratio/high_max": 0.0018767592191579752, "clip_ratio/high_mean": 0.0007306269926630193, "clip_ratio/low_mean": 0.000846367367557832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015769943420309573, "epoch": 5.858309037900875, "grad_norm": 0.15192414820194244, "learning_rate": 1e-06, "loss": 0.02, "step": 572 }, { "clip_ratio/high_max": 0.0019685166407725774, "clip_ratio/high_mean": 0.000775187496401486, "clip_ratio/low_mean": 0.0006368958472648956, "clip_ratio/low_min": 1.7775881133275107e-05, "clip_ratio/region_mean": 0.0014120833584456705, "epoch": 5.867638483965014, "grad_norm": 0.20702487230300903, "learning_rate": 1e-06, "loss": 0.0031, "step": 573 }, { "clip_ratio/high_max": 0.0019279746993561275, "clip_ratio/high_mean": 0.0007916132981335977, "clip_ratio/low_mean": 0.0006677431574644288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014593564883398358, "epoch": 5.876967930029155, "grad_norm": 0.15073294937610626, "learning_rate": 1e-06, "loss": -0.0071, "step": 574 }, { "clip_ratio/high_max": 0.002002699300646782, "clip_ratio/high_mean": 0.000865550922753755, "clip_ratio/low_mean": 0.0006973894578550244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015629403678758536, "epoch": 5.886297376093294, "grad_norm": 0.14705288410186768, "learning_rate": 1e-06, "loss": -0.0394, "step": 575 }, { "clip_ratio/high_max": 0.0021125098610355053, "clip_ratio/high_mean": 0.0008916950391721912, "clip_ratio/low_mean": 0.0008205929207178997, "clip_ratio/low_min": 3.521126927807927e-05, "clip_ratio/region_mean": 0.0017122879507951438, "epoch": 5.895626822157435, "grad_norm": 0.15603530406951904, "learning_rate": 1e-06, "loss": 0.004, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.039829799107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3737.0, "completions/mean_length": 694.5028076171875, "completions/mean_terminated_length": 553.4017944335938, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 6.0093294460641395, "grad_norm": 0.15491129457950592, "learning_rate": 1e-06, "loss": -0.0189, "num_tokens": 344597160.0, "reward": 0.63671875, "reward_std": 0.16129061579704285, "rewards/simpleverify_reward/mean": 0.63671875, "rewards/simpleverify_reward/std": 0.4809616506099701, "step": 577 }, { "clip_ratio/high_max": 0.001919586502481252, "clip_ratio/high_mean": 0.0006739636246493319, "clip_ratio/low_mean": 0.000524968713762064, "clip_ratio/low_min": 3.282629495515721e-05, "clip_ratio/region_mean": 0.0011989323647867423, "epoch": 6.01865889212828, "grad_norm": 0.18103481829166412, "learning_rate": 1e-06, "loss": 0.0293, "step": 578 }, { "clip_ratio/high_max": 0.0020036811674799537, "clip_ratio/high_mean": 0.0006938218311915989, "clip_ratio/low_mean": 0.0005617009901470738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001255522811334231, "epoch": 6.0279883381924195, "grad_norm": 0.16267411410808563, "learning_rate": 1e-06, "loss": -0.045, "step": 579 }, { "clip_ratio/high_max": 0.002008957770158304, "clip_ratio/high_mean": 0.0007997703833098058, "clip_ratio/low_mean": 0.0005702600919903489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013700304916710593, "epoch": 6.03731778425656, "grad_norm": 0.19187845289707184, "learning_rate": 1e-06, "loss": -0.0343, "step": 580 }, { "clip_ratio/high_max": 0.00181676175998291, "clip_ratio/high_mean": 0.0006759132629667874, "clip_ratio/low_mean": 0.0006069472929084441, "clip_ratio/low_min": 4.7561904466419946e-05, "clip_ratio/region_mean": 0.0012828605213144328, "epoch": 6.0466472303206995, "grad_norm": 0.1466693878173828, "learning_rate": 1e-06, "loss": -0.0096, "step": 581 }, { "clip_ratio/high_max": 0.0016278331095236354, "clip_ratio/high_mean": 0.0006537529388879193, "clip_ratio/low_mean": 0.0005507595742528792, "clip_ratio/low_min": 1.814750248740893e-05, "clip_ratio/region_mean": 0.0012045125258737244, "epoch": 6.05597667638484, "grad_norm": 0.16816377639770508, "learning_rate": 1e-06, "loss": -0.0278, "step": 582 }, { "clip_ratio/high_max": 0.0016104318492580205, "clip_ratio/high_mean": 0.0005920316889387323, "clip_ratio/low_mean": 0.0006300787481450243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012221104370837566, "epoch": 6.0653061224489795, "grad_norm": 0.14041100442409515, "learning_rate": 1e-06, "loss": 0.0296, "step": 583 }, { "clip_ratio/high_max": 0.0014902109396643937, "clip_ratio/high_mean": 0.0006784888882975793, "clip_ratio/low_mean": 0.0007630390900885686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014415280129469465, "epoch": 6.07463556851312, "grad_norm": 0.3670876622200012, "learning_rate": 1e-06, "loss": 0.0132, "step": 584 }, { "clip_ratio/high_max": 0.0020207285051583312, "clip_ratio/high_mean": 0.0008041464188863756, "clip_ratio/low_mean": 0.000638652061979883, "clip_ratio/low_min": 1.4194866707839537e-05, "clip_ratio/region_mean": 0.0014427984606300015, "epoch": 6.0839650145772595, "grad_norm": 0.13824422657489777, "learning_rate": 1e-06, "loss": 0.0033, "step": 585 }, { "clip_ratio/high_max": 0.0018872332511818968, "clip_ratio/high_mean": 0.0007773780343995895, "clip_ratio/low_mean": 0.0006664134079983342, "clip_ratio/low_min": 1.9740997231565416e-05, "clip_ratio/region_mean": 0.0014437914651352912, "epoch": 6.093294460641399, "grad_norm": 0.14797744154930115, "learning_rate": 1e-06, "loss": 0.0066, "step": 586 }, { "clip_ratio/high_max": 0.0018471718576620333, "clip_ratio/high_mean": 0.0006855852643639082, "clip_ratio/low_mean": 0.0007698037170484895, "clip_ratio/low_min": 1.6456029698019847e-05, "clip_ratio/region_mean": 0.0014553889923263341, "epoch": 6.1026239067055394, "grad_norm": 0.14437782764434814, "learning_rate": 1e-06, "loss": 0.0266, "step": 587 }, { "clip_ratio/high_max": 0.001975101444259053, "clip_ratio/high_mean": 0.0008992478578875307, "clip_ratio/low_mean": 0.0007573171242256649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001656564956647344, "epoch": 6.111953352769679, "grad_norm": 0.14998990297317505, "learning_rate": 1e-06, "loss": -0.0129, "step": 588 }, { "clip_ratio/high_max": 0.0021488690399564803, "clip_ratio/high_mean": 0.0007962211329868296, "clip_ratio/low_mean": 0.0006290404580795439, "clip_ratio/low_min": 1.613319545867853e-05, "clip_ratio/region_mean": 0.0014252615910663735, "epoch": 6.121282798833819, "grad_norm": 0.14553377032279968, "learning_rate": 1e-06, "loss": -0.0101, "step": 589 }, { "clip_ratio/high_max": 0.001997496401600074, "clip_ratio/high_mean": 0.0008074373290583026, "clip_ratio/low_mean": 0.0008485957023367519, "clip_ratio/low_min": 3.664701944217086e-05, "clip_ratio/region_mean": 0.0016560330259380862, "epoch": 6.130612244897959, "grad_norm": 0.14704501628875732, "learning_rate": 1e-06, "loss": -0.0111, "step": 590 }, { "clip_ratio/high_max": 0.002224771975306794, "clip_ratio/high_mean": 0.0009641050019126851, "clip_ratio/low_mean": 0.0007491799606214045, "clip_ratio/low_min": 1.2804753168893512e-05, "clip_ratio/region_mean": 0.001713284924335312, "epoch": 6.139941690962099, "grad_norm": 0.19491524994373322, "learning_rate": 1e-06, "loss": -0.0377, "step": 591 }, { "clip_ratio/high_max": 0.002120082004694268, "clip_ratio/high_mean": 0.0009592449332558317, "clip_ratio/low_mean": 0.0007993273102329113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001758572216203902, "epoch": 6.149271137026239, "grad_norm": 0.16303853690624237, "learning_rate": 1e-06, "loss": -0.0392, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04296875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 710.6819458007812, "completions/mean_terminated_length": 558.6880493164062, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 6.158600583090379, "grad_norm": 0.16690988838672638, "learning_rate": 1e-06, "loss": 0.0275, "num_tokens": 353700384.0, "reward": 0.6370675563812256, "reward_std": 0.15456557273864746, "rewards/simpleverify_reward/mean": 0.6370674967765808, "rewards/simpleverify_reward/std": 0.4808623790740967, "step": 593 }, { "clip_ratio/high_max": 0.001806603351724334, "clip_ratio/high_mean": 0.0006325614795059664, "clip_ratio/low_mean": 0.0005091435614303919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011417050409363583, "epoch": 6.167930029154519, "grad_norm": 0.15658491849899292, "learning_rate": 1e-06, "loss": -0.0273, "step": 594 }, { "clip_ratio/high_max": 0.0015686731949244859, "clip_ratio/high_mean": 0.0005682862156390911, "clip_ratio/low_mean": 0.0005099292991417315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010782155295601115, "epoch": 6.1772594752186585, "grad_norm": 0.13026687502861023, "learning_rate": 1e-06, "loss": -0.009, "step": 595 }, { "clip_ratio/high_max": 0.0016069627199613024, "clip_ratio/high_mean": 0.0006389846867023152, "clip_ratio/low_mean": 0.0005413385333667975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011803232409874909, "epoch": 6.186588921282799, "grad_norm": 0.17378823459148407, "learning_rate": 1e-06, "loss": 0.0177, "step": 596 }, { "clip_ratio/high_max": 0.0016198443117900752, "clip_ratio/high_mean": 0.0006331452023005113, "clip_ratio/low_mean": 0.0006067912727303337, "clip_ratio/low_min": 1.3168983969080728e-05, "clip_ratio/region_mean": 0.0012399364677548874, "epoch": 6.1959183673469385, "grad_norm": 0.14215828478336334, "learning_rate": 1e-06, "loss": 0.0035, "step": 597 }, { "clip_ratio/high_max": 0.0019641958315332886, "clip_ratio/high_mean": 0.0007148986105676158, "clip_ratio/low_mean": 0.0006308817301032832, "clip_ratio/low_min": 2.547130407037912e-05, "clip_ratio/region_mean": 0.0013457804161589593, "epoch": 6.205247813411079, "grad_norm": 0.22686822712421417, "learning_rate": 1e-06, "loss": 0.0133, "step": 598 }, { "clip_ratio/high_max": 0.0017920222817338072, "clip_ratio/high_mean": 0.0006678558338535368, "clip_ratio/low_mean": 0.0005647033876812202, "clip_ratio/low_min": 3.9423566704499535e-05, "clip_ratio/region_mean": 0.001232559239724651, "epoch": 6.214577259475218, "grad_norm": 0.14843516051769257, "learning_rate": 1e-06, "loss": 0.0162, "step": 599 }, { "clip_ratio/high_max": 0.0017919118981808424, "clip_ratio/high_mean": 0.0007036240840534447, "clip_ratio/low_mean": 0.000613041812357551, "clip_ratio/low_min": 1.4757969438505825e-05, "clip_ratio/region_mean": 0.001316665864578681, "epoch": 6.223906705539359, "grad_norm": 0.17748725414276123, "learning_rate": 1e-06, "loss": -0.0256, "step": 600 }, { "clip_ratio/high_max": 0.001972877318621613, "clip_ratio/high_mean": 0.000707440317455621, "clip_ratio/low_mean": 0.0007279314868355868, "clip_ratio/low_min": 4.226809960528044e-05, "clip_ratio/region_mean": 0.0014353718324855436, "epoch": 6.233236151603498, "grad_norm": 0.13498690724372864, "learning_rate": 1e-06, "loss": -0.0176, "step": 601 }, { "clip_ratio/high_max": 0.0017035210257745348, "clip_ratio/high_mean": 0.000643052394480037, "clip_ratio/low_mean": 0.000637551103864098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012806035192625131, "epoch": 6.242565597667639, "grad_norm": 0.13412533700466156, "learning_rate": 1e-06, "loss": 0.0038, "step": 602 }, { "clip_ratio/high_max": 0.002381499798502773, "clip_ratio/high_mean": 0.0008634504465589998, "clip_ratio/low_mean": 0.0006958754875086015, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001559325901325792, "epoch": 6.251895043731778, "grad_norm": 0.15132562816143036, "learning_rate": 1e-06, "loss": -0.0196, "step": 603 }, { "clip_ratio/high_max": 0.0020850693981628865, "clip_ratio/high_mean": 0.0007716840354987653, "clip_ratio/low_mean": 0.0008730713361728704, "clip_ratio/low_min": 5.777216028945986e-05, "clip_ratio/region_mean": 0.001644755404413445, "epoch": 6.261224489795918, "grad_norm": 0.16473767161369324, "learning_rate": 1e-06, "loss": 0.0466, "step": 604 }, { "clip_ratio/high_max": 0.0019757129630306736, "clip_ratio/high_mean": 0.0008200345018849475, "clip_ratio/low_mean": 0.0005902782113480498, "clip_ratio/low_min": 1.7241380191990174e-05, "clip_ratio/region_mean": 0.001410312714142492, "epoch": 6.270553935860058, "grad_norm": 0.1366012990474701, "learning_rate": 1e-06, "loss": -0.0651, "step": 605 }, { "clip_ratio/high_max": 0.002153994078980759, "clip_ratio/high_mean": 0.0009099323797272518, "clip_ratio/low_mean": 0.0007321627454075497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001642095114220865, "epoch": 6.279883381924198, "grad_norm": 0.14059123396873474, "learning_rate": 1e-06, "loss": -0.0614, "step": 606 }, { "clip_ratio/high_max": 0.002074650394206401, "clip_ratio/high_mean": 0.0008502578511979664, "clip_ratio/low_mean": 0.0007827364752301946, "clip_ratio/low_min": 1.2580515431181993e-05, "clip_ratio/region_mean": 0.0016329942918673623, "epoch": 6.289212827988338, "grad_norm": 0.16644296050071716, "learning_rate": 1e-06, "loss": -0.0522, "step": 607 }, { "clip_ratio/high_max": 0.0021648217589245178, "clip_ratio/high_mean": 0.0007852999151509721, "clip_ratio/low_mean": 0.0007414771116600605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015267770359059796, "epoch": 6.298542274052478, "grad_norm": 0.1403196156024933, "learning_rate": 1e-06, "loss": -0.0273, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0479213169642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 738.6390991210938, "completions/mean_terminated_length": 569.6517944335938, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 6.307871720116618, "grad_norm": 0.15220703184604645, "learning_rate": 1e-06, "loss": -0.0213, "num_tokens": 362903074.0, "reward": 0.6363700032234192, "reward_std": 0.1691824346780777, "rewards/simpleverify_reward/mean": 0.6363700032234192, "rewards/simpleverify_reward/std": 0.4810606837272644, "step": 609 }, { "clip_ratio/high_max": 0.0018897771151387133, "clip_ratio/high_mean": 0.0007372880336333765, "clip_ratio/low_mean": 0.0004419397782839951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011792278128268663, "epoch": 6.317201166180758, "grad_norm": 0.1590404361486435, "learning_rate": 1e-06, "loss": -0.0331, "step": 610 }, { "clip_ratio/high_max": 0.0019019450955966022, "clip_ratio/high_mean": 0.0007029517582850531, "clip_ratio/low_mean": 0.000493793074383575, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011967448226641864, "epoch": 6.326530612244898, "grad_norm": 0.143892303109169, "learning_rate": 1e-06, "loss": 0.016, "step": 611 }, { "clip_ratio/high_max": 0.0017615857577766292, "clip_ratio/high_mean": 0.0007341962109421729, "clip_ratio/low_mean": 0.0004801871828021831, "clip_ratio/low_min": 3.240020669181831e-05, "clip_ratio/region_mean": 0.0012143833737354726, "epoch": 6.335860058309038, "grad_norm": 0.15266813337802887, "learning_rate": 1e-06, "loss": -0.0341, "step": 612 }, { "clip_ratio/high_max": 0.0018520566954975948, "clip_ratio/high_mean": 0.0007149409902922343, "clip_ratio/low_mean": 0.0005197067712288117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012346478033578023, "epoch": 6.345189504373177, "grad_norm": 0.12829330563545227, "learning_rate": 1e-06, "loss": -0.0381, "step": 613 }, { "clip_ratio/high_max": 0.0017060288682841929, "clip_ratio/high_mean": 0.0006467259718192508, "clip_ratio/low_mean": 0.0006345520064314769, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001281278000533348, "epoch": 6.354518950437318, "grad_norm": 0.1478157937526703, "learning_rate": 1e-06, "loss": 0.0123, "step": 614 }, { "clip_ratio/high_max": 0.0018250000503030606, "clip_ratio/high_mean": 0.0006787888114558882, "clip_ratio/low_mean": 0.0006313187786872732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001310107581957709, "epoch": 6.363848396501457, "grad_norm": 0.14290618896484375, "learning_rate": 1e-06, "loss": -0.0198, "step": 615 }, { "clip_ratio/high_max": 0.0015484656105400063, "clip_ratio/high_mean": 0.0006819610443926649, "clip_ratio/low_mean": 0.0007499772764276713, "clip_ratio/low_min": 8.567218355892692e-05, "clip_ratio/region_mean": 0.0014319383190013468, "epoch": 6.373177842565598, "grad_norm": 0.13824959099292755, "learning_rate": 1e-06, "loss": 0.0206, "step": 616 }, { "clip_ratio/high_max": 0.0019286041206214577, "clip_ratio/high_mean": 0.0008070317617239198, "clip_ratio/low_mean": 0.0005752796823799144, "clip_ratio/low_min": 1.1841606465168297e-05, "clip_ratio/region_mean": 0.0013823114240949508, "epoch": 6.382507288629737, "grad_norm": 0.15563172101974487, "learning_rate": 1e-06, "loss": -0.0428, "step": 617 }, { "clip_ratio/high_max": 0.0019265068040112965, "clip_ratio/high_mean": 0.0007322928104258608, "clip_ratio/low_mean": 0.0007196860015028506, "clip_ratio/low_min": 2.9185150197008625e-05, "clip_ratio/region_mean": 0.0014519788237521425, "epoch": 6.391836734693878, "grad_norm": 0.1465056985616684, "learning_rate": 1e-06, "loss": -0.0162, "step": 618 }, { "clip_ratio/high_max": 0.002417337669612607, "clip_ratio/high_mean": 0.0008951387117122067, "clip_ratio/low_mean": 0.0008836039978632471, "clip_ratio/low_min": 2.709733416850213e-05, "clip_ratio/region_mean": 0.0017787426768336445, "epoch": 6.401166180758017, "grad_norm": 0.15463121235370636, "learning_rate": 1e-06, "loss": 0.0023, "step": 619 }, { "clip_ratio/high_max": 0.0020132555146119557, "clip_ratio/high_mean": 0.0008814732209430076, "clip_ratio/low_mean": 0.0007743682981526945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016558415154577233, "epoch": 6.410495626822158, "grad_norm": 0.16060666739940643, "learning_rate": 1e-06, "loss": -0.049, "step": 620 }, { "clip_ratio/high_max": 0.002188598740758607, "clip_ratio/high_mean": 0.000913670959562296, "clip_ratio/low_mean": 0.0007812970407030662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016949680320976768, "epoch": 6.419825072886297, "grad_norm": 0.16013433039188385, "learning_rate": 1e-06, "loss": 0.0018, "step": 621 }, { "clip_ratio/high_max": 0.002192209904023912, "clip_ratio/high_mean": 0.0008086710131465225, "clip_ratio/low_mean": 0.0007854901377868373, "clip_ratio/low_min": 4.151774010097142e-05, "clip_ratio/region_mean": 0.0015941611636662856, "epoch": 6.429154518950437, "grad_norm": 0.1425849348306656, "learning_rate": 1e-06, "loss": 0.0135, "step": 622 }, { "clip_ratio/high_max": 0.0024639868424856104, "clip_ratio/high_mean": 0.00099493134439399, "clip_ratio/low_mean": 0.0007792305023031076, "clip_ratio/low_min": 1.1877613360411488e-05, "clip_ratio/region_mean": 0.0017741618503350765, "epoch": 6.438483965014577, "grad_norm": 0.1665271520614624, "learning_rate": 1e-06, "loss": -0.0428, "step": 623 }, { "clip_ratio/high_max": 0.0023037885839585215, "clip_ratio/high_mean": 0.0009165822939394275, "clip_ratio/low_mean": 0.0008657633798065945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017823456873884425, "epoch": 6.447813411078717, "grad_norm": 0.15604479610919952, "learning_rate": 1e-06, "loss": -0.0218, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0484793526785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 731.228759765625, "completions/mean_terminated_length": 559.7958374023438, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 6.457142857142857, "grad_norm": 0.1645672470331192, "learning_rate": 1e-06, "loss": 0.0328, "num_tokens": 371944809.0, "reward": 0.647530734539032, "reward_std": 0.157597616314888, "rewards/simpleverify_reward/mean": 0.6475306749343872, "rewards/simpleverify_reward/std": 0.4777558147907257, "step": 625 }, { "clip_ratio/high_max": 0.00173076491773827, "clip_ratio/high_mean": 0.0006628720093431184, "clip_ratio/low_mean": 0.0004704133880295558, "clip_ratio/low_min": 2.484595461282879e-05, "clip_ratio/region_mean": 0.0011332854155625682, "epoch": 6.466472303206997, "grad_norm": 0.17037677764892578, "learning_rate": 1e-06, "loss": 0.0033, "step": 626 }, { "clip_ratio/high_max": 0.0015682143566664308, "clip_ratio/high_mean": 0.000574013609366375, "clip_ratio/low_mean": 0.00043744321146732545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010114568212884478, "epoch": 6.475801749271137, "grad_norm": 0.14114873111248016, "learning_rate": 1e-06, "loss": -0.0109, "step": 627 }, { "clip_ratio/high_max": 0.0019474342334433459, "clip_ratio/high_mean": 0.0007707926779403351, "clip_ratio/low_mean": 0.0004608268000083626, "clip_ratio/low_min": 1.5299878214136697e-05, "clip_ratio/region_mean": 0.001231619458849309, "epoch": 6.485131195335277, "grad_norm": 0.1628972887992859, "learning_rate": 1e-06, "loss": -0.029, "step": 628 }, { "clip_ratio/high_max": 0.0020590399435604922, "clip_ratio/high_mean": 0.0009157667300314642, "clip_ratio/low_mean": 0.0004433435224200366, "clip_ratio/low_min": 3.399510387680493e-05, "clip_ratio/region_mean": 0.0013591102506325115, "epoch": 6.494460641399417, "grad_norm": 0.16961784660816193, "learning_rate": 1e-06, "loss": -0.039, "step": 629 }, { "clip_ratio/high_max": 0.0015149171631492209, "clip_ratio/high_mean": 0.0005846458361702389, "clip_ratio/low_mean": 0.0005972566450509476, "clip_ratio/low_min": 1.3097233932057861e-05, "clip_ratio/region_mean": 0.0011819024803116918, "epoch": 6.503790087463557, "grad_norm": 0.13685010373592377, "learning_rate": 1e-06, "loss": 0.0115, "step": 630 }, { "clip_ratio/high_max": 0.0020636940316762775, "clip_ratio/high_mean": 0.0009494031728536356, "clip_ratio/low_mean": 0.000604048662353307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015534518424829002, "epoch": 6.513119533527696, "grad_norm": 0.16159525513648987, "learning_rate": 1e-06, "loss": -0.0287, "step": 631 }, { "clip_ratio/high_max": 0.0021665730309905484, "clip_ratio/high_mean": 0.0008609513351984788, "clip_ratio/low_mean": 0.0005674328613167745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014283841846918222, "epoch": 6.522448979591837, "grad_norm": 0.13494084775447845, "learning_rate": 1e-06, "loss": -0.0534, "step": 632 }, { "clip_ratio/high_max": 0.0021023013832746074, "clip_ratio/high_mean": 0.0008622819750598865, "clip_ratio/low_mean": 0.0006494074468719191, "clip_ratio/low_min": 1.9213033738196827e-05, "clip_ratio/region_mean": 0.001511689457402099, "epoch": 6.531778425655976, "grad_norm": 0.1242283508181572, "learning_rate": 1e-06, "loss": -0.0475, "step": 633 }, { "clip_ratio/high_max": 0.0018183599386247806, "clip_ratio/high_mean": 0.0006902606978655967, "clip_ratio/low_mean": 0.0005622839494208165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001252544669114286, "epoch": 6.541107871720117, "grad_norm": 0.14416494965553284, "learning_rate": 1e-06, "loss": 0.0282, "step": 634 }, { "clip_ratio/high_max": 0.0020478284059208818, "clip_ratio/high_mean": 0.0008566188316763146, "clip_ratio/low_mean": 0.0006022809393471107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001458899747376563, "epoch": 6.550437317784256, "grad_norm": 0.1463879495859146, "learning_rate": 1e-06, "loss": 0.0138, "step": 635 }, { "clip_ratio/high_max": 0.0020761941304954235, "clip_ratio/high_mean": 0.0008053589735936839, "clip_ratio/low_mean": 0.0007754978496450349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001580856853252044, "epoch": 6.559766763848397, "grad_norm": 0.14974966645240784, "learning_rate": 1e-06, "loss": -0.0142, "step": 636 }, { "clip_ratio/high_max": 0.002344689746678341, "clip_ratio/high_mean": 0.0008214875015255529, "clip_ratio/low_mean": 0.000584075722599664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014055632273084484, "epoch": 6.569096209912536, "grad_norm": 0.1632349193096161, "learning_rate": 1e-06, "loss": -0.0152, "step": 637 }, { "clip_ratio/high_max": 0.0018879763374570757, "clip_ratio/high_mean": 0.0009268940520996694, "clip_ratio/low_mean": 0.0007899954453023383, "clip_ratio/low_min": 8.449390588793904e-05, "clip_ratio/region_mean": 0.001716889462841209, "epoch": 6.578425655976677, "grad_norm": 0.1445135772228241, "learning_rate": 1e-06, "loss": -0.0512, "step": 638 }, { "clip_ratio/high_max": 0.0020735623329528607, "clip_ratio/high_mean": 0.0008213894834625535, "clip_ratio/low_mean": 0.0007986258269738755, "clip_ratio/low_min": 8.709812755114399e-05, "clip_ratio/region_mean": 0.0016200152967940085, "epoch": 6.587755102040816, "grad_norm": 0.15330392122268677, "learning_rate": 1e-06, "loss": -0.0236, "step": 639 }, { "clip_ratio/high_max": 0.0017329181318928022, "clip_ratio/high_mean": 0.0007332640107051702, "clip_ratio/low_mean": 0.0006853703762317309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014186343869369011, "epoch": 6.597084548104956, "grad_norm": 0.14644472301006317, "learning_rate": 1e-06, "loss": 0.0019, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052734375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 743.1141967773438, "completions/mean_terminated_length": 556.4586791992188, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 6.606413994169096, "grad_norm": 0.1573868840932846, "learning_rate": 1e-06, "loss": 0.0116, "num_tokens": 380968790.0, "reward": 0.6321847438812256, "reward_std": 0.1572457104921341, "rewards/simpleverify_reward/mean": 0.6321846842765808, "rewards/simpleverify_reward/std": 0.4822275936603546, "step": 641 }, { "clip_ratio/high_max": 0.0018824848084477708, "clip_ratio/high_mean": 0.000720023217581911, "clip_ratio/low_mean": 0.00046002859562577214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011800518223026302, "epoch": 6.615743440233236, "grad_norm": 39.679874420166016, "learning_rate": 1e-06, "loss": -0.0189, "step": 642 }, { "clip_ratio/high_max": 0.0016747734298405703, "clip_ratio/high_mean": 0.0006909623789397301, "clip_ratio/low_mean": 0.0006153986396384425, "clip_ratio/low_min": 5.6728098570602015e-05, "clip_ratio/region_mean": 0.001306361020397162, "epoch": 6.625072886297376, "grad_norm": 0.1801864504814148, "learning_rate": 1e-06, "loss": -0.0097, "step": 643 }, { "clip_ratio/high_max": 0.0015970002314134035, "clip_ratio/high_mean": 0.0006614745252591092, "clip_ratio/low_mean": 0.0003825937474175589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001044068294504541, "epoch": 6.634402332361516, "grad_norm": 0.1580941081047058, "learning_rate": 1e-06, "loss": -0.0445, "step": 644 }, { "clip_ratio/high_max": 0.002135661354259355, "clip_ratio/high_mean": 0.0007864030631026253, "clip_ratio/low_mean": 0.0005559220953728072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013423251402855385, "epoch": 6.643731778425656, "grad_norm": 0.16089344024658203, "learning_rate": 1e-06, "loss": -0.0745, "step": 645 }, { "clip_ratio/high_max": 0.001773468276951462, "clip_ratio/high_mean": 0.000633921607004595, "clip_ratio/low_mean": 0.000500211769121961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011341334065946285, "epoch": 6.653061224489796, "grad_norm": 0.1482229232788086, "learning_rate": 1e-06, "loss": -0.0024, "step": 646 }, { "clip_ratio/high_max": 0.0017554285004734993, "clip_ratio/high_mean": 0.0006850219242551248, "clip_ratio/low_mean": 0.0005835569791088346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012685788897215389, "epoch": 6.662390670553936, "grad_norm": 0.14896056056022644, "learning_rate": 1e-06, "loss": -0.0162, "step": 647 }, { "clip_ratio/high_max": 0.0017138592047558632, "clip_ratio/high_mean": 0.0006638596778429928, "clip_ratio/low_mean": 0.0006317424940789351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012956021637364756, "epoch": 6.671720116618076, "grad_norm": 0.15372514724731445, "learning_rate": 1e-06, "loss": -0.0057, "step": 648 }, { "clip_ratio/high_max": 0.0023271992868103553, "clip_ratio/high_mean": 0.0009219995581588591, "clip_ratio/low_mean": 0.0007069722887536045, "clip_ratio/low_min": 9.144189243670553e-05, "clip_ratio/region_mean": 0.0016289718441839796, "epoch": 6.681049562682215, "grad_norm": 0.15298575162887573, "learning_rate": 1e-06, "loss": -0.032, "step": 649 }, { "clip_ratio/high_max": 0.002010010641242843, "clip_ratio/high_mean": 0.0008162479034581338, "clip_ratio/low_mean": 0.0006246867260415456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014409346149477642, "epoch": 6.690379008746356, "grad_norm": 0.17311494052410126, "learning_rate": 1e-06, "loss": -0.0281, "step": 650 }, { "clip_ratio/high_max": 0.0024296795745613053, "clip_ratio/high_mean": 0.0009443695489608217, "clip_ratio/low_mean": 0.0008802299525996204, "clip_ratio/low_min": 4.7970551349862944e-05, "clip_ratio/region_mean": 0.0018245995161123574, "epoch": 6.699708454810495, "grad_norm": 0.1855933666229248, "learning_rate": 1e-06, "loss": 0.0015, "step": 651 }, { "clip_ratio/high_max": 0.001893857497634599, "clip_ratio/high_mean": 0.000784049845606205, "clip_ratio/low_mean": 0.0007845249965612311, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015685747857787646, "epoch": 6.709037900874636, "grad_norm": 0.15073570609092712, "learning_rate": 1e-06, "loss": 0.0094, "step": 652 }, { "clip_ratio/high_max": 0.0019275867161923088, "clip_ratio/high_mean": 0.0008131190315907588, "clip_ratio/low_mean": 0.0006740352564520435, "clip_ratio/low_min": 3.162955545121804e-05, "clip_ratio/region_mean": 0.001487154313508654, "epoch": 6.718367346938775, "grad_norm": 0.1540910005569458, "learning_rate": 1e-06, "loss": -0.0117, "step": 653 }, { "clip_ratio/high_max": 0.0020199111313559115, "clip_ratio/high_mean": 0.0008346119175257627, "clip_ratio/low_mean": 0.0008207047440009774, "clip_ratio/low_min": 4.0683680708752945e-05, "clip_ratio/region_mean": 0.0016553166351513937, "epoch": 6.727696793002916, "grad_norm": 0.1424509733915329, "learning_rate": 1e-06, "loss": -0.0209, "step": 654 }, { "clip_ratio/high_max": 0.0020525084401015192, "clip_ratio/high_mean": 0.0009102451258513611, "clip_ratio/low_mean": 0.0008532408646715339, "clip_ratio/low_min": 3.5084008231933694e-05, "clip_ratio/region_mean": 0.0017634860196267255, "epoch": 6.737026239067055, "grad_norm": 0.15142090618610382, "learning_rate": 1e-06, "loss": -0.0237, "step": 655 }, { "clip_ratio/high_max": 0.0020697106010629795, "clip_ratio/high_mean": 0.0007416871430905303, "clip_ratio/low_mean": 0.00091507216870923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016567593265790492, "epoch": 6.746355685131196, "grad_norm": 0.1601790487766266, "learning_rate": 1e-06, "loss": 0.0184, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0487583705357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 734.9230346679688, "completions/mean_terminated_length": 562.6422119140625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.755685131195335, "grad_norm": 0.17796482145786285, "learning_rate": 1e-06, "loss": -0.0035, "num_tokens": 390078118.0, "reward": 0.634765625, "reward_std": 0.1525833010673523, "rewards/simpleverify_reward/mean": 0.634765625, "rewards/simpleverify_reward/std": 0.4815126061439514, "step": 657 }, { "clip_ratio/high_max": 0.002090992122248281, "clip_ratio/high_mean": 0.0007332539207709488, "clip_ratio/low_mean": 0.0004803767869816511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012136307123000734, "epoch": 6.765014577259475, "grad_norm": 0.13966546952724457, "learning_rate": 1e-06, "loss": -0.007, "step": 658 }, { "clip_ratio/high_max": 0.0019126246879750397, "clip_ratio/high_mean": 0.0006367667519953102, "clip_ratio/low_mean": 0.000571202043829544, "clip_ratio/low_min": 1.2065636838087812e-05, "clip_ratio/region_mean": 0.001207968802191317, "epoch": 6.774344023323615, "grad_norm": 0.15298497676849365, "learning_rate": 1e-06, "loss": -0.0454, "step": 659 }, { "clip_ratio/high_max": 0.0017587958936928771, "clip_ratio/high_mean": 0.0006659427217527991, "clip_ratio/low_mean": 0.00046680479863425717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011327475294820033, "epoch": 6.783673469387755, "grad_norm": 0.1550005078315735, "learning_rate": 1e-06, "loss": -0.0112, "step": 660 }, { "clip_ratio/high_max": 0.0017420007825421635, "clip_ratio/high_mean": 0.0007176783037721179, "clip_ratio/low_mean": 0.0004025882417408866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011202665373275522, "epoch": 6.793002915451895, "grad_norm": 0.13431932032108307, "learning_rate": 1e-06, "loss": -0.0372, "step": 661 }, { "clip_ratio/high_max": 0.0017036305616784375, "clip_ratio/high_mean": 0.0007234342810988892, "clip_ratio/low_mean": 0.0005451314830224874, "clip_ratio/low_min": 1.1314265066175722e-05, "clip_ratio/region_mean": 0.0012685657748079393, "epoch": 6.802332361516035, "grad_norm": 0.1333310902118683, "learning_rate": 1e-06, "loss": -0.0105, "step": 662 }, { "clip_ratio/high_max": 0.0019924982407246716, "clip_ratio/high_mean": 0.0007861541198508348, "clip_ratio/low_mean": 0.0006615308629989158, "clip_ratio/low_min": 1.7740561816026457e-05, "clip_ratio/region_mean": 0.001447684993763687, "epoch": 6.811661807580175, "grad_norm": 0.15378892421722412, "learning_rate": 1e-06, "loss": -0.0181, "step": 663 }, { "clip_ratio/high_max": 0.00190073145495262, "clip_ratio/high_mean": 0.0007955122609928367, "clip_ratio/low_mean": 0.0005292729647408123, "clip_ratio/low_min": 1.163440083473688e-05, "clip_ratio/region_mean": 0.0013247852220956702, "epoch": 6.820991253644315, "grad_norm": 0.1422271877527237, "learning_rate": 1e-06, "loss": -0.0652, "step": 664 }, { "clip_ratio/high_max": 0.001712131761451019, "clip_ratio/high_mean": 0.0006964726489968598, "clip_ratio/low_mean": 0.0007800204857630888, "clip_ratio/low_min": 3.109452882199548e-05, "clip_ratio/region_mean": 0.0014764931329409592, "epoch": 6.830320699708455, "grad_norm": 0.16426940262317657, "learning_rate": 1e-06, "loss": 0.0148, "step": 665 }, { "clip_ratio/high_max": 0.0017310102521150839, "clip_ratio/high_mean": 0.0007534322676292504, "clip_ratio/low_mean": 0.0007850020137993852, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015384343278128654, "epoch": 6.839650145772595, "grad_norm": 0.16266119480133057, "learning_rate": 1e-06, "loss": 0.0105, "step": 666 }, { "clip_ratio/high_max": 0.0017234196893696208, "clip_ratio/high_mean": 0.0007541752429460757, "clip_ratio/low_mean": 0.0005774379424110521, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013316131553438026, "epoch": 6.848979591836734, "grad_norm": 0.14158910512924194, "learning_rate": 1e-06, "loss": -0.0315, "step": 667 }, { "clip_ratio/high_max": 0.0018562687691883184, "clip_ratio/high_mean": 0.00071352055783791, "clip_ratio/low_mean": 0.0005719044293073239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012854249944211915, "epoch": 6.858309037900875, "grad_norm": 0.13960792124271393, "learning_rate": 1e-06, "loss": -0.0343, "step": 668 }, { "clip_ratio/high_max": 0.0020985101291444153, "clip_ratio/high_mean": 0.0007480308158847038, "clip_ratio/low_mean": 0.0006580039125765325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014060347239137627, "epoch": 6.867638483965014, "grad_norm": 0.14887776970863342, "learning_rate": 1e-06, "loss": 0.0089, "step": 669 }, { "clip_ratio/high_max": 0.0020139322077739052, "clip_ratio/high_mean": 0.0008427332031715196, "clip_ratio/low_mean": 0.0006062044558348134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014489375862467568, "epoch": 6.876967930029155, "grad_norm": 0.1348954737186432, "learning_rate": 1e-06, "loss": 0.0031, "step": 670 }, { "clip_ratio/high_max": 0.002360861559282057, "clip_ratio/high_mean": 0.0008313103680848144, "clip_ratio/low_mean": 0.0007008062002569204, "clip_ratio/low_min": 1.617494854144752e-05, "clip_ratio/region_mean": 0.0015321165410568938, "epoch": 6.886297376093294, "grad_norm": 0.15998440980911255, "learning_rate": 1e-06, "loss": -0.0439, "step": 671 }, { "clip_ratio/high_max": 0.002043867621978279, "clip_ratio/high_mean": 0.0008191325923689874, "clip_ratio/low_mean": 0.000783366109317285, "clip_ratio/low_min": 3.5310735256643966e-05, "clip_ratio/region_mean": 0.0016024986980482936, "epoch": 6.895626822157435, "grad_norm": 0.1343689113855362, "learning_rate": 1e-06, "loss": -0.0042, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 747.8745727539062, "completions/mean_terminated_length": 558.3579711914062, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 7.0093294460641395, "grad_norm": 0.1621917486190796, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 399094463.0, "reward": 0.6410435438156128, "reward_std": 0.1536213606595993, "rewards/simpleverify_reward/mean": 0.6410435438156128, "rewards/simpleverify_reward/std": 0.4797111451625824, "step": 673 }, { "clip_ratio/high_max": 0.001539603701530723, "clip_ratio/high_mean": 0.0006097130990383448, "clip_ratio/low_mean": 0.0005757337762588577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011854468793899287, "epoch": 7.01865889212828, "grad_norm": 0.1572120189666748, "learning_rate": 1e-06, "loss": -0.0048, "step": 674 }, { "clip_ratio/high_max": 0.0019126074257656, "clip_ratio/high_mean": 0.0007311776153073879, "clip_ratio/low_mean": 0.0005582010908256052, "clip_ratio/low_min": 1.526624328107573e-05, "clip_ratio/region_mean": 0.0012893786952190567, "epoch": 7.0279883381924195, "grad_norm": 0.1481982171535492, "learning_rate": 1e-06, "loss": 0.0063, "step": 675 }, { "clip_ratio/high_max": 0.002272270001412835, "clip_ratio/high_mean": 0.0007995716150617227, "clip_ratio/low_mean": 0.0005442440869956044, "clip_ratio/low_min": 1.310822153754998e-05, "clip_ratio/region_mean": 0.0013438156820484437, "epoch": 7.03731778425656, "grad_norm": 0.16868920624256134, "learning_rate": 1e-06, "loss": -0.0578, "step": 676 }, { "clip_ratio/high_max": 0.0016890315928321797, "clip_ratio/high_mean": 0.0006223497075552586, "clip_ratio/low_mean": 0.0006483062006736873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00127065589549602, "epoch": 7.0466472303206995, "grad_norm": 0.1477431207895279, "learning_rate": 1e-06, "loss": 0.0282, "step": 677 }, { "clip_ratio/high_max": 0.001798963810870191, "clip_ratio/high_mean": 0.0006255647836042044, "clip_ratio/low_mean": 0.0005276437805150636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011532085591170471, "epoch": 7.05597667638484, "grad_norm": 0.151478111743927, "learning_rate": 1e-06, "loss": -0.028, "step": 678 }, { "clip_ratio/high_max": 0.0020074108833796345, "clip_ratio/high_mean": 0.0007035363450995646, "clip_ratio/low_mean": 0.0005213109484429879, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012248473030922469, "epoch": 7.0653061224489795, "grad_norm": 0.16450725495815277, "learning_rate": 1e-06, "loss": -0.0279, "step": 679 }, { "clip_ratio/high_max": 0.001810269386623986, "clip_ratio/high_mean": 0.0007494703013435355, "clip_ratio/low_mean": 0.0005827561144542415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013322264421731234, "epoch": 7.07463556851312, "grad_norm": 0.15337780117988586, "learning_rate": 1e-06, "loss": -0.0098, "step": 680 }, { "clip_ratio/high_max": 0.001803622184525011, "clip_ratio/high_mean": 0.0006478087379946373, "clip_ratio/low_mean": 0.000855501641126466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015033103918540291, "epoch": 7.0839650145772595, "grad_norm": 0.16000093519687653, "learning_rate": 1e-06, "loss": -0.0007, "step": 681 }, { "clip_ratio/high_max": 0.002251836904179072, "clip_ratio/high_mean": 0.0007387350224234979, "clip_ratio/low_mean": 0.0007284641551450477, "clip_ratio/low_min": 2.351981856918428e-05, "clip_ratio/region_mean": 0.0014671991593786515, "epoch": 7.093294460641399, "grad_norm": 0.1345730423927307, "learning_rate": 1e-06, "loss": -0.0142, "step": 682 }, { "clip_ratio/high_max": 0.002168652012187522, "clip_ratio/high_mean": 0.0007856204028939828, "clip_ratio/low_mean": 0.0007344220248342026, "clip_ratio/low_min": 2.28268818318611e-05, "clip_ratio/region_mean": 0.001520042435004143, "epoch": 7.1026239067055394, "grad_norm": 0.1596435010433197, "learning_rate": 1e-06, "loss": 0.0117, "step": 683 }, { "clip_ratio/high_max": 0.0021119923585501965, "clip_ratio/high_mean": 0.0008231516003434081, "clip_ratio/low_mean": 0.000633661509709782, "clip_ratio/low_min": 5.613436223939061e-05, "clip_ratio/region_mean": 0.001456813119148137, "epoch": 7.111953352769679, "grad_norm": 0.14788201451301575, "learning_rate": 1e-06, "loss": -0.0639, "step": 684 }, { "clip_ratio/high_max": 0.002531781268771738, "clip_ratio/high_mean": 0.0009383931901538745, "clip_ratio/low_mean": 0.0007571269507025136, "clip_ratio/low_min": 2.51004021265544e-05, "clip_ratio/region_mean": 0.0016955201426753774, "epoch": 7.121282798833819, "grad_norm": 0.1539023071527481, "learning_rate": 1e-06, "loss": -0.056, "step": 685 }, { "clip_ratio/high_max": 0.0021008096446166746, "clip_ratio/high_mean": 0.0007846729240554851, "clip_ratio/low_mean": 0.0009674267021182459, "clip_ratio/low_min": 6.943963762751082e-05, "clip_ratio/region_mean": 0.0017520996552775614, "epoch": 7.130612244897959, "grad_norm": 0.1636466085910797, "learning_rate": 1e-06, "loss": 0.0088, "step": 686 }, { "clip_ratio/high_max": 0.0021834333238075487, "clip_ratio/high_mean": 0.0007792117539793253, "clip_ratio/low_mean": 0.000741887059120927, "clip_ratio/low_min": 9.504257832304575e-06, "clip_ratio/region_mean": 0.0015210988458420616, "epoch": 7.139941690962099, "grad_norm": 0.12650802731513977, "learning_rate": 1e-06, "loss": -0.0424, "step": 687 }, { "clip_ratio/high_max": 0.0022351375555444974, "clip_ratio/high_mean": 0.0007857369746488985, "clip_ratio/low_mean": 0.0009921559667418478, "clip_ratio/low_min": 4.7187990276142955e-05, "clip_ratio/region_mean": 0.0017778929286578204, "epoch": 7.149271137026239, "grad_norm": 0.1392034888267517, "learning_rate": 1e-06, "loss": -0.0214, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052525111607142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 741.730712890625, "completions/mean_terminated_length": 555.7802124023438, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.158600583090379, "grad_norm": 0.15640226006507874, "learning_rate": 1e-06, "loss": -0.0456, "num_tokens": 408060194.0, "reward": 0.642159640789032, "reward_std": 0.15820829570293427, "rewards/simpleverify_reward/mean": 0.6421595811843872, "rewards/simpleverify_reward/std": 0.4793815314769745, "step": 689 }, { "clip_ratio/high_max": 0.00163877672093804, "clip_ratio/high_mean": 0.0006754090936738066, "clip_ratio/low_mean": 0.00047572618632329977, "clip_ratio/low_min": 3.831222056760453e-05, "clip_ratio/region_mean": 0.0011511352931847796, "epoch": 7.167930029154519, "grad_norm": 0.15836471319198608, "learning_rate": 1e-06, "loss": -0.0073, "step": 690 }, { "clip_ratio/high_max": 0.00203203925411799, "clip_ratio/high_mean": 0.000759183254558593, "clip_ratio/low_mean": 0.0005063432131464651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012655264690693002, "epoch": 7.1772594752186585, "grad_norm": 0.23170249164104462, "learning_rate": 1e-06, "loss": -0.0211, "step": 691 }, { "clip_ratio/high_max": 0.002047374000540003, "clip_ratio/high_mean": 0.0008106197528832126, "clip_ratio/low_mean": 0.0005158672174729872, "clip_ratio/low_min": 1.5013211850600783e-05, "clip_ratio/region_mean": 0.0013264869558042847, "epoch": 7.186588921282799, "grad_norm": 0.19392050802707672, "learning_rate": 1e-06, "loss": -0.0379, "step": 692 }, { "clip_ratio/high_max": 0.0018406469498586375, "clip_ratio/high_mean": 0.0007606118233525194, "clip_ratio/low_mean": 0.0006191339889483061, "clip_ratio/low_min": 1.5147842532314826e-05, "clip_ratio/region_mean": 0.001379745841404656, "epoch": 7.1959183673469385, "grad_norm": 0.20466382801532745, "learning_rate": 1e-06, "loss": -0.0017, "step": 693 }, { "clip_ratio/high_max": 0.0020437474922800902, "clip_ratio/high_mean": 0.0007549766705778893, "clip_ratio/low_mean": 0.0004432261634974566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011982028590864502, "epoch": 7.205247813411079, "grad_norm": 0.1611458957195282, "learning_rate": 1e-06, "loss": -0.028, "step": 694 }, { "clip_ratio/high_max": 0.0022115908286650665, "clip_ratio/high_mean": 0.0008583474646002287, "clip_ratio/low_mean": 0.0005979473035040428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014562947762897238, "epoch": 7.214577259475218, "grad_norm": 0.16115182638168335, "learning_rate": 1e-06, "loss": -0.0469, "step": 695 }, { "clip_ratio/high_max": 0.001804371495381929, "clip_ratio/high_mean": 0.0006733804584655445, "clip_ratio/low_mean": 0.0005566821437241742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012300626040087081, "epoch": 7.223906705539359, "grad_norm": 0.15548433363437653, "learning_rate": 1e-06, "loss": 0.0076, "step": 696 }, { "clip_ratio/high_max": 0.0019128283674945123, "clip_ratio/high_mean": 0.0007582830767205451, "clip_ratio/low_mean": 0.0005653898497257615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001323672906437423, "epoch": 7.233236151603498, "grad_norm": 0.14589658379554749, "learning_rate": 1e-06, "loss": -0.0432, "step": 697 }, { "clip_ratio/high_max": 0.0019868299932568334, "clip_ratio/high_mean": 0.0008241607747550006, "clip_ratio/low_mean": 0.0006251850409171311, "clip_ratio/low_min": 1.6250649423454888e-05, "clip_ratio/region_mean": 0.0014493457856588066, "epoch": 7.242565597667639, "grad_norm": 0.14956364035606384, "learning_rate": 1e-06, "loss": -0.0161, "step": 698 }, { "clip_ratio/high_max": 0.0022927711688680574, "clip_ratio/high_mean": 0.0009091871543205343, "clip_ratio/low_mean": 0.0006104647954998654, "clip_ratio/low_min": 1.4592575098504312e-05, "clip_ratio/region_mean": 0.001519651948910905, "epoch": 7.251895043731778, "grad_norm": 0.16980783641338348, "learning_rate": 1e-06, "loss": -0.0208, "step": 699 }, { "clip_ratio/high_max": 0.0021122362159076147, "clip_ratio/high_mean": 0.0008357551851077005, "clip_ratio/low_mean": 0.0007476556793335476, "clip_ratio/low_min": 2.8480291803134605e-05, "clip_ratio/region_mean": 0.0015834108817216475, "epoch": 7.261224489795918, "grad_norm": 0.15545222163200378, "learning_rate": 1e-06, "loss": -0.005, "step": 700 }, { "clip_ratio/high_max": 0.002355021395487711, "clip_ratio/high_mean": 0.000897260788406129, "clip_ratio/low_mean": 0.0007661175059183734, "clip_ratio/low_min": 3.7702613553847186e-05, "clip_ratio/region_mean": 0.0016633783307042904, "epoch": 7.270553935860058, "grad_norm": 0.15768355131149292, "learning_rate": 1e-06, "loss": -0.0209, "step": 701 }, { "clip_ratio/high_max": 0.0021650454509654082, "clip_ratio/high_mean": 0.0008409084130107658, "clip_ratio/low_mean": 0.0008839494294079486, "clip_ratio/low_min": 4.6743463826715015e-05, "clip_ratio/region_mean": 0.0017248578442377038, "epoch": 7.279883381924198, "grad_norm": 0.16740065813064575, "learning_rate": 1e-06, "loss": -0.0066, "step": 702 }, { "clip_ratio/high_max": 0.002225271033239551, "clip_ratio/high_mean": 0.0008802727188594872, "clip_ratio/low_mean": 0.0007405721439681656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016208448505494744, "epoch": 7.289212827988338, "grad_norm": 0.16897745430469513, "learning_rate": 1e-06, "loss": -0.0445, "step": 703 }, { "clip_ratio/high_max": 0.0018601813062559813, "clip_ratio/high_mean": 0.0007885113009251654, "clip_ratio/low_mean": 0.0007888553745942772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015773666964378208, "epoch": 7.298542274052478, "grad_norm": 0.14527839422225952, "learning_rate": 1e-06, "loss": -0.0063, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05126953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 735.4595336914062, "completions/mean_terminated_length": 553.8553466796875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 7.307871720116618, "grad_norm": 0.18107275664806366, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 417015133.0, "reward": 0.648995578289032, "reward_std": 0.14964734017848969, "rewards/simpleverify_reward/mean": 0.6489955186843872, "rewards/simpleverify_reward/std": 0.4773009717464447, "step": 705 }, { "clip_ratio/high_max": 0.0015317681281885598, "clip_ratio/high_mean": 0.0005316663900885032, "clip_ratio/low_mean": 0.00043497728074726183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009666436599218287, "epoch": 7.317201166180758, "grad_norm": 0.18445536494255066, "learning_rate": 1e-06, "loss": 0.0152, "step": 706 }, { "clip_ratio/high_max": 0.0019466772355372086, "clip_ratio/high_mean": 0.0007205711863207398, "clip_ratio/low_mean": 0.00043095207365695387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011515232545207255, "epoch": 7.326530612244898, "grad_norm": 0.146745502948761, "learning_rate": 1e-06, "loss": -0.0282, "step": 707 }, { "clip_ratio/high_max": 0.0017631504560995381, "clip_ratio/high_mean": 0.0006659015707555227, "clip_ratio/low_mean": 0.0005264603551040636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001192361905850703, "epoch": 7.335860058309038, "grad_norm": 0.13995243608951569, "learning_rate": 1e-06, "loss": -0.0217, "step": 708 }, { "clip_ratio/high_max": 0.001944116324011702, "clip_ratio/high_mean": 0.0008198856357921613, "clip_ratio/low_mean": 0.0004218016820232151, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012416872959875036, "epoch": 7.345189504373177, "grad_norm": 0.2748670279979706, "learning_rate": 1e-06, "loss": -0.045, "step": 709 }, { "clip_ratio/high_max": 0.001631357106816722, "clip_ratio/high_mean": 0.0006500547115138033, "clip_ratio/low_mean": 0.0005407509261203813, "clip_ratio/low_min": 5.6887136452132836e-05, "clip_ratio/region_mean": 0.001190805618534796, "epoch": 7.354518950437318, "grad_norm": 0.1622065305709839, "learning_rate": 1e-06, "loss": -0.0271, "step": 710 }, { "clip_ratio/high_max": 0.001746570003888337, "clip_ratio/high_mean": 0.0006207927126524737, "clip_ratio/low_mean": 0.0005834534376845113, "clip_ratio/low_min": 3.5072953323833644e-05, "clip_ratio/region_mean": 0.0012042461312375963, "epoch": 7.363848396501457, "grad_norm": 0.1480531543493271, "learning_rate": 1e-06, "loss": 0.0189, "step": 711 }, { "clip_ratio/high_max": 0.0020705725000880193, "clip_ratio/high_mean": 0.0007706549640715821, "clip_ratio/low_mean": 0.0005705160092475126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013411709696811158, "epoch": 7.373177842565598, "grad_norm": 0.14939482510089874, "learning_rate": 1e-06, "loss": -0.0318, "step": 712 }, { "clip_ratio/high_max": 0.0019072955910814926, "clip_ratio/high_mean": 0.0006847083750471938, "clip_ratio/low_mean": 0.0005688673174972791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012535756613942795, "epoch": 7.382507288629737, "grad_norm": 0.14344856142997742, "learning_rate": 1e-06, "loss": -0.009, "step": 713 }, { "clip_ratio/high_max": 0.0018861067146644928, "clip_ratio/high_mean": 0.000744418137401226, "clip_ratio/low_mean": 0.0005407584185377345, "clip_ratio/low_min": 1.4688601368106902e-05, "clip_ratio/region_mean": 0.0012851765386585612, "epoch": 7.391836734693878, "grad_norm": 0.13547088205814362, "learning_rate": 1e-06, "loss": -0.0259, "step": 714 }, { "clip_ratio/high_max": 0.0021554495542659424, "clip_ratio/high_mean": 0.0007934052791824797, "clip_ratio/low_mean": 0.0007400218028124073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015334270865423605, "epoch": 7.401166180758017, "grad_norm": 0.17248861491680145, "learning_rate": 1e-06, "loss": 0.0006, "step": 715 }, { "clip_ratio/high_max": 0.0022074430744396523, "clip_ratio/high_mean": 0.0009041889679792803, "clip_ratio/low_mean": 0.0006711603382427711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015753492843941785, "epoch": 7.410495626822158, "grad_norm": 0.18279635906219482, "learning_rate": 1e-06, "loss": -0.0372, "step": 716 }, { "clip_ratio/high_max": 0.0017698980554996524, "clip_ratio/high_mean": 0.0007066643265716266, "clip_ratio/low_mean": 0.0008270556390925776, "clip_ratio/low_min": 1.385194991598837e-05, "clip_ratio/region_mean": 0.001533719969302183, "epoch": 7.419825072886297, "grad_norm": 0.14719562232494354, "learning_rate": 1e-06, "loss": 0.0265, "step": 717 }, { "clip_ratio/high_max": 0.002490229810064193, "clip_ratio/high_mean": 0.0009357395138067659, "clip_ratio/low_mean": 0.0006826507324149134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016183902189368382, "epoch": 7.429154518950437, "grad_norm": 0.17594051361083984, "learning_rate": 1e-06, "loss": -0.0511, "step": 718 }, { "clip_ratio/high_max": 0.0021290337135724258, "clip_ratio/high_mean": 0.0007835490341676632, "clip_ratio/low_mean": 0.0007326543473027414, "clip_ratio/low_min": 5.089576370664872e-05, "clip_ratio/region_mean": 0.0015162034251261503, "epoch": 7.438483965014577, "grad_norm": 0.14147989451885223, "learning_rate": 1e-06, "loss": -0.0122, "step": 719 }, { "clip_ratio/high_max": 0.0021233216029941104, "clip_ratio/high_mean": 0.0008758742515055928, "clip_ratio/low_mean": 0.0008478984254907118, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017237726824532729, "epoch": 7.447813411078717, "grad_norm": 0.17874298989772797, "learning_rate": 1e-06, "loss": -0.0075, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0581752232142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 779.3267822265625, "completions/mean_terminated_length": 574.4603881835938, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.457142857142857, "grad_norm": 0.1868254691362381, "learning_rate": 1e-06, "loss": -0.0274, "num_tokens": 426192065.0, "reward": 0.6388811469078064, "reward_std": 0.1531626582145691, "rewards/simpleverify_reward/mean": 0.6388811469078064, "rewards/simpleverify_reward/std": 0.48034167289733887, "step": 721 }, { "clip_ratio/high_max": 0.0018730557567323558, "clip_ratio/high_mean": 0.0006359995441016508, "clip_ratio/low_mean": 0.00039736558778713515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001033365129842423, "epoch": 7.466472303206997, "grad_norm": 0.1781143695116043, "learning_rate": 1e-06, "loss": -0.0448, "step": 722 }, { "clip_ratio/high_max": 0.0014458152691076975, "clip_ratio/high_mean": 0.0005790389841422439, "clip_ratio/low_mean": 0.0004890119255378522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010680509021767648, "epoch": 7.475801749271137, "grad_norm": 0.1618252843618393, "learning_rate": 1e-06, "loss": -0.031, "step": 723 }, { "clip_ratio/high_max": 0.0017352153699903283, "clip_ratio/high_mean": 0.0006737239145877538, "clip_ratio/low_mean": 0.0005270035808280227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012007274963252712, "epoch": 7.485131195335277, "grad_norm": 0.14613813161849976, "learning_rate": 1e-06, "loss": -0.0408, "step": 724 }, { "clip_ratio/high_max": 0.0018066938828269485, "clip_ratio/high_mean": 0.000625143327852129, "clip_ratio/low_mean": 0.0006150977224024246, "clip_ratio/low_min": 1.3278096048452426e-05, "clip_ratio/region_mean": 0.0012402410502545536, "epoch": 7.494460641399417, "grad_norm": 0.15042772889137268, "learning_rate": 1e-06, "loss": 0.0167, "step": 725 }, { "clip_ratio/high_max": 0.0015640314668416977, "clip_ratio/high_mean": 0.0005529885947908042, "clip_ratio/low_mean": 0.0005726306726501207, "clip_ratio/low_min": 2.696871706575621e-05, "clip_ratio/region_mean": 0.0011256192829023348, "epoch": 7.503790087463557, "grad_norm": 0.15938085317611694, "learning_rate": 1e-06, "loss": 0.015, "step": 726 }, { "clip_ratio/high_max": 0.0018822420970536768, "clip_ratio/high_mean": 0.0006255292510104482, "clip_ratio/low_mean": 0.0005923046528550913, "clip_ratio/low_min": 9.226454494637437e-06, "clip_ratio/region_mean": 0.0012178339165984653, "epoch": 7.513119533527696, "grad_norm": 0.1639842391014099, "learning_rate": 1e-06, "loss": -0.01, "step": 727 }, { "clip_ratio/high_max": 0.0020747629532706924, "clip_ratio/high_mean": 0.0007406369022646686, "clip_ratio/low_mean": 0.0006105275251684361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013511644319805782, "epoch": 7.522448979591837, "grad_norm": 0.13223564624786377, "learning_rate": 1e-06, "loss": -0.0101, "step": 728 }, { "clip_ratio/high_max": 0.0020152615106781013, "clip_ratio/high_mean": 0.0007753410154691665, "clip_ratio/low_mean": 0.0006525476828755927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014278887247201055, "epoch": 7.531778425655976, "grad_norm": 0.16785383224487305, "learning_rate": 1e-06, "loss": -0.0259, "step": 729 }, { "clip_ratio/high_max": 0.0014869000478938688, "clip_ratio/high_mean": 0.0006088391237426549, "clip_ratio/low_mean": 0.0007812808034941554, "clip_ratio/low_min": 3.742384160432266e-05, "clip_ratio/region_mean": 0.0013901199308747891, "epoch": 7.541107871720117, "grad_norm": 0.14914339780807495, "learning_rate": 1e-06, "loss": 0.0107, "step": 730 }, { "clip_ratio/high_max": 0.0023131000489229336, "clip_ratio/high_mean": 0.0008890741200957564, "clip_ratio/low_mean": 0.0006050412339391187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014941153494874015, "epoch": 7.550437317784256, "grad_norm": 0.15665189921855927, "learning_rate": 1e-06, "loss": -0.0662, "step": 731 }, { "clip_ratio/high_max": 0.0018947239987028297, "clip_ratio/high_mean": 0.0007957971120049478, "clip_ratio/low_mean": 0.0009013649814733071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016971620607364457, "epoch": 7.559766763848397, "grad_norm": 0.15194910764694214, "learning_rate": 1e-06, "loss": -0.0256, "step": 732 }, { "clip_ratio/high_max": 0.0018961176720040385, "clip_ratio/high_mean": 0.000738256196200382, "clip_ratio/low_mean": 0.0007101808769220952, "clip_ratio/low_min": 2.9953746889077593e-05, "clip_ratio/region_mean": 0.0014484371058642864, "epoch": 7.569096209912536, "grad_norm": 0.16773250699043274, "learning_rate": 1e-06, "loss": -0.0049, "step": 733 }, { "clip_ratio/high_max": 0.0021126670762896538, "clip_ratio/high_mean": 0.0009440130852453876, "clip_ratio/low_mean": 0.0006935225255801925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016375356208300218, "epoch": 7.578425655976677, "grad_norm": 0.13990573585033417, "learning_rate": 1e-06, "loss": -0.0573, "step": 734 }, { "clip_ratio/high_max": 0.0017758709218469448, "clip_ratio/high_mean": 0.0007121807575458661, "clip_ratio/low_mean": 0.0006950149891054025, "clip_ratio/low_min": 2.7937028789892793e-05, "clip_ratio/region_mean": 0.0014071957630221732, "epoch": 7.587755102040816, "grad_norm": 0.14114423096179962, "learning_rate": 1e-06, "loss": -0.015, "step": 735 }, { "clip_ratio/high_max": 0.0020793210496776737, "clip_ratio/high_mean": 0.0008554111791454488, "clip_ratio/low_mean": 0.0008939422295952681, "clip_ratio/low_min": 3.285151251475327e-05, "clip_ratio/region_mean": 0.0017493534105597064, "epoch": 7.597084548104956, "grad_norm": 0.15912291407585144, "learning_rate": 1e-06, "loss": 0.0129, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0541294642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 747.5012817382812, "completions/mean_terminated_length": 555.8762817382812, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 7.606413994169096, "grad_norm": 0.17182904481887817, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 435163883.0, "reward": 0.6413923501968384, "reward_std": 0.1491098254919052, "rewards/simpleverify_reward/mean": 0.6413922905921936, "rewards/simpleverify_reward/std": 0.4796084463596344, "step": 737 }, { "clip_ratio/high_max": 0.0014457745746767614, "clip_ratio/high_mean": 0.0005041719741711859, "clip_ratio/low_mean": 0.000516986077400361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010211580447503366, "epoch": 7.615743440233236, "grad_norm": 0.1484900861978531, "learning_rate": 1e-06, "loss": 0.0102, "step": 738 }, { "clip_ratio/high_max": 0.001829417629778618, "clip_ratio/high_mean": 0.0006925506231709733, "clip_ratio/low_mean": 0.00046197977690098924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011545303950697416, "epoch": 7.625072886297376, "grad_norm": 0.1570178121328354, "learning_rate": 1e-06, "loss": -0.0546, "step": 739 }, { "clip_ratio/high_max": 0.0018972965772263706, "clip_ratio/high_mean": 0.000712572546035517, "clip_ratio/low_mean": 0.000503135484905215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012157080091128591, "epoch": 7.634402332361516, "grad_norm": 0.1515045017004013, "learning_rate": 1e-06, "loss": -0.0313, "step": 740 }, { "clip_ratio/high_max": 0.0017744814904290251, "clip_ratio/high_mean": 0.0006563313290826045, "clip_ratio/low_mean": 0.0005843502485731733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012406815694703255, "epoch": 7.643731778425656, "grad_norm": 0.3126060962677002, "learning_rate": 1e-06, "loss": -0.0203, "step": 741 }, { "clip_ratio/high_max": 0.001998363750317367, "clip_ratio/high_mean": 0.0007485660207748879, "clip_ratio/low_mean": 0.000542101221071789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001290667241846677, "epoch": 7.653061224489796, "grad_norm": 0.1534768044948578, "learning_rate": 1e-06, "loss": -0.04, "step": 742 }, { "clip_ratio/high_max": 0.0016331719798472477, "clip_ratio/high_mean": 0.0006074894336052239, "clip_ratio/low_mean": 0.0004559521380542719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010634415702952538, "epoch": 7.662390670553936, "grad_norm": 0.15502174198627472, "learning_rate": 1e-06, "loss": -0.0199, "step": 743 }, { "clip_ratio/high_max": 0.001954210594703909, "clip_ratio/high_mean": 0.0007720812773186481, "clip_ratio/low_mean": 0.0005317488848959329, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013038301767664962, "epoch": 7.671720116618076, "grad_norm": 0.14230623841285706, "learning_rate": 1e-06, "loss": -0.0398, "step": 744 }, { "clip_ratio/high_max": 0.0024696952241356485, "clip_ratio/high_mean": 0.000904272234038217, "clip_ratio/low_mean": 0.0007172481027737376, "clip_ratio/low_min": 3.760020626941696e-05, "clip_ratio/region_mean": 0.0016215203395404387, "epoch": 7.681049562682215, "grad_norm": 0.16257035732269287, "learning_rate": 1e-06, "loss": -0.0218, "step": 745 }, { "clip_ratio/high_max": 0.002157308772439137, "clip_ratio/high_mean": 0.0008278376026282785, "clip_ratio/low_mean": 0.0007871254438214237, "clip_ratio/low_min": 3.831222056760453e-05, "clip_ratio/region_mean": 0.0016149630391737446, "epoch": 7.690379008746356, "grad_norm": 0.15805956721305847, "learning_rate": 1e-06, "loss": -0.0044, "step": 746 }, { "clip_ratio/high_max": 0.0021105510386405513, "clip_ratio/high_mean": 0.0007840776816010475, "clip_ratio/low_mean": 0.0006762130888091633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014602907685912214, "epoch": 7.699708454810495, "grad_norm": 0.1527320295572281, "learning_rate": 1e-06, "loss": -0.0107, "step": 747 }, { "clip_ratio/high_max": 0.002107041687850142, "clip_ratio/high_mean": 0.0008245702047133818, "clip_ratio/low_mean": 0.0007649929702893132, "clip_ratio/low_min": 2.4625858713989146e-05, "clip_ratio/region_mean": 0.0015895632095634937, "epoch": 7.709037900874636, "grad_norm": 0.1556704044342041, "learning_rate": 1e-06, "loss": 0.0154, "step": 748 }, { "clip_ratio/high_max": 0.0021903735505475197, "clip_ratio/high_mean": 0.0008869410248735221, "clip_ratio/low_mean": 0.0007292457048606593, "clip_ratio/low_min": 3.8485221011796966e-05, "clip_ratio/region_mean": 0.0016161867170012556, "epoch": 7.718367346938775, "grad_norm": 0.1845226287841797, "learning_rate": 1e-06, "loss": -0.0425, "step": 749 }, { "clip_ratio/high_max": 0.0019948104381910525, "clip_ratio/high_mean": 0.0007574905121146003, "clip_ratio/low_mean": 0.0008261420953203924, "clip_ratio/low_min": 5.527332723431755e-05, "clip_ratio/region_mean": 0.001583632631081855, "epoch": 7.727696793002916, "grad_norm": 0.14994724094867706, "learning_rate": 1e-06, "loss": -0.0377, "step": 750 }, { "clip_ratio/high_max": 0.0020617368863895535, "clip_ratio/high_mean": 0.0008751375917199766, "clip_ratio/low_mean": 0.0009668973561929306, "clip_ratio/low_min": 2.5997881493822206e-05, "clip_ratio/region_mean": 0.001842034987930674, "epoch": 7.737026239067055, "grad_norm": 0.1462666094303131, "learning_rate": 1e-06, "loss": -0.0114, "step": 751 }, { "clip_ratio/high_max": 0.002463239310600329, "clip_ratio/high_mean": 0.0009523849166725995, "clip_ratio/low_mean": 0.0009486499075137544, "clip_ratio/low_min": 9.164732364297379e-05, "clip_ratio/region_mean": 0.0019010348478332162, "epoch": 7.746355685131196, "grad_norm": 0.1803591102361679, "learning_rate": 1e-06, "loss": -0.0098, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 731.5623168945312, "completions/mean_terminated_length": 545.3095703125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 7.755685131195335, "grad_norm": 0.16945196688175201, "learning_rate": 1e-06, "loss": -0.0147, "num_tokens": 444015912.0, "reward": 0.6492048501968384, "reward_std": 0.14588765799999237, "rewards/simpleverify_reward/mean": 0.6492047905921936, "rewards/simpleverify_reward/std": 0.4772355556488037, "step": 753 }, { "clip_ratio/high_max": 0.0017136082278739195, "clip_ratio/high_mean": 0.0006045274585630978, "clip_ratio/low_mean": 0.0004350928502390161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010396203288109973, "epoch": 7.765014577259475, "grad_norm": 0.164473295211792, "learning_rate": 1e-06, "loss": -0.0356, "step": 754 }, { "clip_ratio/high_max": 0.0016943578448262997, "clip_ratio/high_mean": 0.0005978751469228882, "clip_ratio/low_mean": 0.000612183363045915, "clip_ratio/low_min": 4.460720265342388e-05, "clip_ratio/region_mean": 0.0012100584899599198, "epoch": 7.774344023323615, "grad_norm": 0.15339191257953644, "learning_rate": 1e-06, "loss": 0.0291, "step": 755 }, { "clip_ratio/high_max": 0.0017550014745211229, "clip_ratio/high_mean": 0.0006695338843201171, "clip_ratio/low_mean": 0.0005363055588532006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012058394167979714, "epoch": 7.783673469387755, "grad_norm": 0.14480164647102356, "learning_rate": 1e-06, "loss": -0.0565, "step": 756 }, { "clip_ratio/high_max": 0.0016073643091658596, "clip_ratio/high_mean": 0.0005797947942483006, "clip_ratio/low_mean": 0.000624498919933103, "clip_ratio/low_min": 2.1215206288616173e-05, "clip_ratio/region_mean": 0.0012042937305523083, "epoch": 7.793002915451895, "grad_norm": 0.16761000454425812, "learning_rate": 1e-06, "loss": 0.0169, "step": 757 }, { "clip_ratio/high_max": 0.0015867841721046716, "clip_ratio/high_mean": 0.0006775676210963866, "clip_ratio/low_mean": 0.0004238538666641034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011014214833267033, "epoch": 7.802332361516035, "grad_norm": 0.16307617723941803, "learning_rate": 1e-06, "loss": -0.0115, "step": 758 }, { "clip_ratio/high_max": 0.001662625145399943, "clip_ratio/high_mean": 0.000599279475864023, "clip_ratio/low_mean": 0.0006780094918212853, "clip_ratio/low_min": 1.253761274710996e-05, "clip_ratio/region_mean": 0.00127728899315116, "epoch": 7.811661807580175, "grad_norm": 0.16190168261528015, "learning_rate": 1e-06, "loss": 0.0134, "step": 759 }, { "clip_ratio/high_max": 0.0022277977695921436, "clip_ratio/high_mean": 0.000824742428449099, "clip_ratio/low_mean": 0.0005351803565645241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013599227750091814, "epoch": 7.820991253644315, "grad_norm": 0.13004447519779205, "learning_rate": 1e-06, "loss": -0.0547, "step": 760 }, { "clip_ratio/high_max": 0.0016835303758853115, "clip_ratio/high_mean": 0.0006072947344364366, "clip_ratio/low_mean": 0.0006936543904885184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013009491194679867, "epoch": 7.830320699708455, "grad_norm": 0.14409464597702026, "learning_rate": 1e-06, "loss": 0.0016, "step": 761 }, { "clip_ratio/high_max": 0.0020697440049843863, "clip_ratio/high_mean": 0.000701913528246223, "clip_ratio/low_mean": 0.0007140833195080631, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014159968777676113, "epoch": 7.839650145772595, "grad_norm": 0.1486106514930725, "learning_rate": 1e-06, "loss": -0.0089, "step": 762 }, { "clip_ratio/high_max": 0.0020737498343805782, "clip_ratio/high_mean": 0.0008258014131570235, "clip_ratio/low_mean": 0.0006301573716882558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014559588562406134, "epoch": 7.848979591836734, "grad_norm": 0.15257196128368378, "learning_rate": 1e-06, "loss": -0.0244, "step": 763 }, { "clip_ratio/high_max": 0.002147973580576945, "clip_ratio/high_mean": 0.0007751158209430287, "clip_ratio/low_mean": 0.0006247279629860714, "clip_ratio/low_min": 1.5375153452623636e-05, "clip_ratio/region_mean": 0.0013998438116686884, "epoch": 7.858309037900875, "grad_norm": 0.17857372760772705, "learning_rate": 1e-06, "loss": -0.0287, "step": 764 }, { "clip_ratio/high_max": 0.001956309592060279, "clip_ratio/high_mean": 0.0008469312779197935, "clip_ratio/low_mean": 0.0007374253736998071, "clip_ratio/low_min": 1.2317697837715968e-05, "clip_ratio/region_mean": 0.0015843566943658516, "epoch": 7.867638483965014, "grad_norm": 0.1591649204492569, "learning_rate": 1e-06, "loss": -0.0419, "step": 765 }, { "clip_ratio/high_max": 0.0019306747344671749, "clip_ratio/high_mean": 0.000812383410448092, "clip_ratio/low_mean": 0.0006838268718638574, "clip_ratio/low_min": 2.0325203877291642e-05, "clip_ratio/region_mean": 0.0014962102723075077, "epoch": 7.876967930029155, "grad_norm": 0.19091317057609558, "learning_rate": 1e-06, "loss": -0.046, "step": 766 }, { "clip_ratio/high_max": 0.0024851879497873597, "clip_ratio/high_mean": 0.0009366741851408733, "clip_ratio/low_mean": 0.0006878126932861051, "clip_ratio/low_min": 1.4201317753759213e-05, "clip_ratio/region_mean": 0.001624486849323148, "epoch": 7.886297376093294, "grad_norm": 0.17621733248233795, "learning_rate": 1e-06, "loss": -0.0274, "step": 767 }, { "clip_ratio/high_max": 0.0021220238704700023, "clip_ratio/high_mean": 0.0008073208173300372, "clip_ratio/low_mean": 0.0007792545684424113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015865753666730598, "epoch": 7.895626822157435, "grad_norm": 0.13772836327552795, "learning_rate": 1e-06, "loss": -0.0203, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0606863839285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 774.1727905273438, "completions/mean_terminated_length": 559.5589599609375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 8.00932944606414, "grad_norm": 0.17763680219650269, "learning_rate": 1e-06, "loss": -0.0132, "num_tokens": 452983893.0, "reward": 0.6492048501968384, "reward_std": 0.1455410122871399, "rewards/simpleverify_reward/mean": 0.6492047905921936, "rewards/simpleverify_reward/std": 0.4772355854511261, "step": 769 }, { "clip_ratio/high_max": 0.0016451043302367907, "clip_ratio/high_mean": 0.0006152750047476729, "clip_ratio/low_mean": 0.00041967516608565347, "clip_ratio/low_min": 1.3845812645740807e-05, "clip_ratio/region_mean": 0.0010349501608288847, "epoch": 8.018658892128279, "grad_norm": 0.14779852330684662, "learning_rate": 1e-06, "loss": -0.0325, "step": 770 }, { "clip_ratio/high_max": 0.0015523447436862625, "clip_ratio/high_mean": 0.0006910427018738119, "clip_ratio/low_mean": 0.00047160092981357593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011626436335063772, "epoch": 8.02798833819242, "grad_norm": 0.17682591080665588, "learning_rate": 1e-06, "loss": -0.015, "step": 771 }, { "clip_ratio/high_max": 0.0018720533298619557, "clip_ratio/high_mean": 0.0006658711145064444, "clip_ratio/low_mean": 0.0005458785726659698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012117496662540361, "epoch": 8.03731778425656, "grad_norm": 0.20374347269535065, "learning_rate": 1e-06, "loss": -0.0309, "step": 772 }, { "clip_ratio/high_max": 0.0020375495951157063, "clip_ratio/high_mean": 0.0006962920451769605, "clip_ratio/low_mean": 0.0005292283235576178, "clip_ratio/low_min": 2.0064206182723865e-05, "clip_ratio/region_mean": 0.0012255203910171986, "epoch": 8.0466472303207, "grad_norm": 0.1951533704996109, "learning_rate": 1e-06, "loss": -0.0363, "step": 773 }, { "clip_ratio/high_max": 0.001912979543703841, "clip_ratio/high_mean": 0.0006728394928359194, "clip_ratio/low_mean": 0.0005187991823731863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011916386647499166, "epoch": 8.055976676384839, "grad_norm": 0.16141320765018463, "learning_rate": 1e-06, "loss": -0.014, "step": 774 }, { "clip_ratio/high_max": 0.001666647684032796, "clip_ratio/high_mean": 0.0006313522935670335, "clip_ratio/low_mean": 0.0004629749346349854, "clip_ratio/low_min": 3.1414929253514856e-05, "clip_ratio/region_mean": 0.0010943272209260613, "epoch": 8.06530612244898, "grad_norm": 0.16449318826198578, "learning_rate": 1e-06, "loss": 0.0042, "step": 775 }, { "clip_ratio/high_max": 0.0016347536766261328, "clip_ratio/high_mean": 0.0006668298683507601, "clip_ratio/low_mean": 0.0005899268735447549, "clip_ratio/low_min": 8.108459041977767e-06, "clip_ratio/region_mean": 0.0012567567428050097, "epoch": 8.07463556851312, "grad_norm": 0.17286139726638794, "learning_rate": 1e-06, "loss": -0.0242, "step": 776 }, { "clip_ratio/high_max": 0.002126971179677639, "clip_ratio/high_mean": 0.0007484647630917607, "clip_ratio/low_mean": 0.0005366430332287564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012851077954110224, "epoch": 8.08396501457726, "grad_norm": 0.12581463158130646, "learning_rate": 1e-06, "loss": -0.0365, "step": 777 }, { "clip_ratio/high_max": 0.0017436686612199992, "clip_ratio/high_mean": 0.0007636568270754651, "clip_ratio/low_mean": 0.0006392396326191374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014028964760655072, "epoch": 8.093294460641399, "grad_norm": 0.15798048675060272, "learning_rate": 1e-06, "loss": -0.0293, "step": 778 }, { "clip_ratio/high_max": 0.0022645428107352927, "clip_ratio/high_mean": 0.0009420232345291879, "clip_ratio/low_mean": 0.0006154864704512875, "clip_ratio/low_min": 9.162878995994106e-06, "clip_ratio/region_mean": 0.0015575097131659277, "epoch": 8.102623906705539, "grad_norm": 0.1851341426372528, "learning_rate": 1e-06, "loss": -0.058, "step": 779 }, { "clip_ratio/high_max": 0.002023224369622767, "clip_ratio/high_mean": 0.0007296509502339177, "clip_ratio/low_mean": 0.000731736162379093, "clip_ratio/low_min": 6.71191683068173e-05, "clip_ratio/region_mean": 0.0014613871171604842, "epoch": 8.11195335276968, "grad_norm": 0.1550803929567337, "learning_rate": 1e-06, "loss": -0.0111, "step": 780 }, { "clip_ratio/high_max": 0.0022724664559063967, "clip_ratio/high_mean": 0.0007899450465629343, "clip_ratio/low_mean": 0.0006546829908984364, "clip_ratio/low_min": 1.269035510631511e-05, "clip_ratio/region_mean": 0.0014446280620177276, "epoch": 8.12128279883382, "grad_norm": 0.1443229466676712, "learning_rate": 1e-06, "loss": -0.049, "step": 781 }, { "clip_ratio/high_max": 0.002367722634517122, "clip_ratio/high_mean": 0.0009163110353256343, "clip_ratio/low_mean": 0.0006495231100416277, "clip_ratio/low_min": 1.5210513083729893e-05, "clip_ratio/region_mean": 0.0015658341762900818, "epoch": 8.130612244897959, "grad_norm": 0.1522289216518402, "learning_rate": 1e-06, "loss": -0.0756, "step": 782 }, { "clip_ratio/high_max": 0.001881878557469463, "clip_ratio/high_mean": 0.000713566818376421, "clip_ratio/low_mean": 0.0006957232862987439, "clip_ratio/low_min": 2.378686986048706e-05, "clip_ratio/region_mean": 0.0014092901292315219, "epoch": 8.139941690962099, "grad_norm": 0.20835767686367035, "learning_rate": 1e-06, "loss": 0.0049, "step": 783 }, { "clip_ratio/high_max": 0.0025641350002842955, "clip_ratio/high_mean": 0.0009066196525964187, "clip_ratio/low_mean": 0.0008746927796892123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017813124431995675, "epoch": 8.14927113702624, "grad_norm": 0.17135974764823914, "learning_rate": 1e-06, "loss": 0.0243, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0599888392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 768.703369140625, "completions/mean_terminated_length": 556.3646850585938, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 8.15860058309038, "grad_norm": 0.16017691791057587, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 461948536.0, "reward": 0.6545061469078064, "reward_std": 0.14591239392757416, "rewards/simpleverify_reward/mean": 0.6545061469078064, "rewards/simpleverify_reward/std": 0.47554561495780945, "step": 785 }, { "clip_ratio/high_max": 0.0020416112238308415, "clip_ratio/high_mean": 0.0007356411942964769, "clip_ratio/low_mean": 0.0003256628594954236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010613040540192742, "epoch": 8.167930029154519, "grad_norm": 0.14196988940238953, "learning_rate": 1e-06, "loss": -0.0529, "step": 786 }, { "clip_ratio/high_max": 0.001935205113113625, "clip_ratio/high_mean": 0.000719147736162995, "clip_ratio/low_mean": 0.0004969158799212892, "clip_ratio/low_min": 2.2490105038741603e-05, "clip_ratio/region_mean": 0.00121606362881721, "epoch": 8.177259475218658, "grad_norm": 0.14480482041835785, "learning_rate": 1e-06, "loss": 0.0041, "step": 787 }, { "clip_ratio/high_max": 0.0018623155920067802, "clip_ratio/high_mean": 0.0006981149654166074, "clip_ratio/low_mean": 0.0003591430418055097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010572580249572638, "epoch": 8.186588921282798, "grad_norm": 0.15978367626667023, "learning_rate": 1e-06, "loss": -0.0384, "step": 788 }, { "clip_ratio/high_max": 0.0016970433862297796, "clip_ratio/high_mean": 0.000731179405192961, "clip_ratio/low_mean": 0.0004871332857874222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012183126927993726, "epoch": 8.19591836734694, "grad_norm": 0.19169653952121735, "learning_rate": 1e-06, "loss": -0.0182, "step": 789 }, { "clip_ratio/high_max": 0.0014989302435424179, "clip_ratio/high_mean": 0.0006750638949597487, "clip_ratio/low_mean": 0.0005189528301343671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011940167350985575, "epoch": 8.205247813411079, "grad_norm": 0.14523930847644806, "learning_rate": 1e-06, "loss": -0.0172, "step": 790 }, { "clip_ratio/high_max": 0.001851722423452884, "clip_ratio/high_mean": 0.0006668795704172226, "clip_ratio/low_mean": 0.000525768477928068, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011926480292459019, "epoch": 8.214577259475218, "grad_norm": 0.16181409358978271, "learning_rate": 1e-06, "loss": -0.0342, "step": 791 }, { "clip_ratio/high_max": 0.0017426312115276232, "clip_ratio/high_mean": 0.0006708648479616386, "clip_ratio/low_mean": 0.0006199874351295875, "clip_ratio/low_min": 3.499742342683021e-05, "clip_ratio/region_mean": 0.0012908523021906149, "epoch": 8.223906705539358, "grad_norm": 0.15695470571517944, "learning_rate": 1e-06, "loss": 0.0018, "step": 792 }, { "clip_ratio/high_max": 0.001957222291821381, "clip_ratio/high_mean": 0.0007615907034050906, "clip_ratio/low_mean": 0.0005893878187634982, "clip_ratio/low_min": 2.9790277039865032e-05, "clip_ratio/region_mean": 0.0013509784839698114, "epoch": 8.2332361516035, "grad_norm": 0.1481734663248062, "learning_rate": 1e-06, "loss": -0.0162, "step": 793 }, { "clip_ratio/high_max": 0.0021822622147738002, "clip_ratio/high_mean": 0.0007458421951014316, "clip_ratio/low_mean": 0.0006068831125958241, "clip_ratio/low_min": 1.8200349586550146e-05, "clip_ratio/region_mean": 0.0013527253286156338, "epoch": 8.242565597667639, "grad_norm": 0.14205248653888702, "learning_rate": 1e-06, "loss": -0.0217, "step": 794 }, { "clip_ratio/high_max": 0.0019490581544232555, "clip_ratio/high_mean": 0.000809789195045596, "clip_ratio/low_mean": 0.000553276606297004, "clip_ratio/low_min": 2.4276558178826235e-05, "clip_ratio/region_mean": 0.0013630658213514835, "epoch": 8.251895043731778, "grad_norm": 0.13818272948265076, "learning_rate": 1e-06, "loss": -0.0489, "step": 795 }, { "clip_ratio/high_max": 0.0017627268462092616, "clip_ratio/high_mean": 0.000764463805808191, "clip_ratio/low_mean": 0.0006315709770206013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013960348223918118, "epoch": 8.261224489795918, "grad_norm": 0.17026136815547943, "learning_rate": 1e-06, "loss": -0.0243, "step": 796 }, { "clip_ratio/high_max": 0.002669161454832647, "clip_ratio/high_mean": 0.000994614763840218, "clip_ratio/low_mean": 0.0005275646062727901, "clip_ratio/low_min": 1.5103914847713895e-05, "clip_ratio/region_mean": 0.0015221794092212804, "epoch": 8.270553935860057, "grad_norm": 0.14741864800453186, "learning_rate": 1e-06, "loss": -0.0532, "step": 797 }, { "clip_ratio/high_max": 0.0021660119527950883, "clip_ratio/high_mean": 0.0008313395665027201, "clip_ratio/low_mean": 0.0006300538052528282, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014613933672080748, "epoch": 8.279883381924199, "grad_norm": 0.13700611889362335, "learning_rate": 1e-06, "loss": -0.024, "step": 798 }, { "clip_ratio/high_max": 0.0021038115555711556, "clip_ratio/high_mean": 0.0008208367071347311, "clip_ratio/low_mean": 0.0007182831122918287, "clip_ratio/low_min": 4.762509706779383e-05, "clip_ratio/region_mean": 0.0015391198176075704, "epoch": 8.289212827988338, "grad_norm": 0.1732536107301712, "learning_rate": 1e-06, "loss": -0.0001, "step": 799 }, { "clip_ratio/high_max": 0.002179421644541435, "clip_ratio/high_mean": 0.0008064037810981972, "clip_ratio/low_mean": 0.0006399707535820198, "clip_ratio/low_min": 1.4404240573639981e-05, "clip_ratio/region_mean": 0.001446374550141627, "epoch": 8.298542274052478, "grad_norm": 0.4844701588153839, "learning_rate": 1e-06, "loss": -0.0148, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625697544642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 758.1707763671875, "completions/mean_terminated_length": 535.3839111328125, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 7.485131195335277, "frac_reward_zero_std": 0.676339328289032, "grad_norm": 0.16048765182495117, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 470543584.0, "reward": 0.6644113063812256, "reward_std": 0.14094272255897522, "rewards/simpleverify_reward/mean": 0.6644112467765808, "rewards/simpleverify_reward/std": 0.4722123146057129, "step": 801 }, { "clip_ratio/high_max": 0.002057202575088013, "clip_ratio/high_mean": 0.000732836999304709, "clip_ratio/low_mean": 0.00040515164073440246, "clip_ratio/low_min": 4.072409501532093e-05, "clip_ratio/region_mean": 0.0011379886491340585, "epoch": 7.494460641399417, "grad_norm": 0.14790748059749603, "learning_rate": 1e-06, "loss": -0.0201, "step": 802 }, { "clip_ratio/high_max": 0.0016836167360452237, "clip_ratio/high_mean": 0.0006843132500762295, "clip_ratio/low_mean": 0.0004880198339378694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011723330826498568, "epoch": 7.503790087463557, "grad_norm": 0.19162651896476746, "learning_rate": 1e-06, "loss": 0.009, "step": 803 }, { "clip_ratio/high_max": 0.0015355879404523876, "clip_ratio/high_mean": 0.0006536783375850064, "clip_ratio/low_mean": 0.0004854880226048408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011391663501854055, "epoch": 7.513119533527696, "grad_norm": 0.17766648530960083, "learning_rate": 1e-06, "loss": -0.002, "step": 804 }, { "clip_ratio/high_max": 0.0018136436228815, "clip_ratio/high_mean": 0.0006961762301216368, "clip_ratio/low_mean": 0.0005706911488232436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012668673880398273, "epoch": 7.522448979591837, "grad_norm": 0.1565045565366745, "learning_rate": 1e-06, "loss": -0.0214, "step": 805 }, { "clip_ratio/high_max": 0.0016966407638392411, "clip_ratio/high_mean": 0.0006741133365721907, "clip_ratio/low_mean": 0.0005614269748548395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012355403086985461, "epoch": 7.531778425655976, "grad_norm": 0.19917000830173492, "learning_rate": 1e-06, "loss": 0.0229, "step": 806 }, { "clip_ratio/high_max": 0.0018831934939953499, "clip_ratio/high_mean": 0.0007403286626868066, "clip_ratio/low_mean": 0.0005736498387705069, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013139784714439884, "epoch": 7.541107871720117, "grad_norm": 0.1700940877199173, "learning_rate": 1e-06, "loss": -0.0156, "step": 807 }, { "clip_ratio/high_max": 0.0019333282252773643, "clip_ratio/high_mean": 0.0007411165279336274, "clip_ratio/low_mean": 0.000568046099260755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013091626206005458, "epoch": 7.550437317784256, "grad_norm": 0.15023234486579895, "learning_rate": 1e-06, "loss": 0.0018, "step": 808 }, { "clip_ratio/high_max": 0.0019189989689039066, "clip_ratio/high_mean": 0.000791374581240234, "clip_ratio/low_mean": 0.000665124120132532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014564986959157977, "epoch": 7.559766763848397, "grad_norm": 0.1674487143754959, "learning_rate": 1e-06, "loss": -0.0447, "step": 809 }, { "clip_ratio/high_max": 0.0018368272176303435, "clip_ratio/high_mean": 0.0007714728235441726, "clip_ratio/low_mean": 0.0005951353841737728, "clip_ratio/low_min": 7.766869202896487e-06, "clip_ratio/region_mean": 0.0013666081686096732, "epoch": 7.569096209912536, "grad_norm": 0.1545816957950592, "learning_rate": 1e-06, "loss": -0.0552, "step": 810 }, { "clip_ratio/high_max": 0.002220380152721191, "clip_ratio/high_mean": 0.0008754595837672241, "clip_ratio/low_mean": 0.0006207837177498732, "clip_ratio/low_min": 2.8237951482878998e-05, "clip_ratio/region_mean": 0.0014962432906031609, "epoch": 7.578425655976677, "grad_norm": 0.20020872354507446, "learning_rate": 1e-06, "loss": -0.0252, "step": 811 }, { "clip_ratio/high_max": 0.0024653645741636865, "clip_ratio/high_mean": 0.0009719052741274936, "clip_ratio/low_mean": 0.0006411510662474029, "clip_ratio/low_min": 3.4645232517505065e-05, "clip_ratio/region_mean": 0.0016130563817569055, "epoch": 7.587755102040816, "grad_norm": 0.16022798418998718, "learning_rate": 1e-06, "loss": -0.041, "step": 812 }, { "clip_ratio/high_max": 0.0020816557516809553, "clip_ratio/high_mean": 0.0007668948646823992, "clip_ratio/low_mean": 0.0008131178947223816, "clip_ratio/low_min": 1.537893695058301e-05, "clip_ratio/region_mean": 0.001580012758495286, "epoch": 7.597084548104956, "grad_norm": 0.19745729863643646, "learning_rate": 1e-06, "loss": 0.0212, "step": 813 }, { "clip_ratio/high_max": 0.0027655405865516514, "clip_ratio/high_mean": 0.000985359503829386, "clip_ratio/low_mean": 0.000574123159822193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001559482672746526, "epoch": 7.606413994169096, "grad_norm": 0.14873485267162323, "learning_rate": 1e-06, "loss": -0.0661, "step": 814 }, { "clip_ratio/high_max": 0.0021875521342735738, "clip_ratio/high_mean": 0.0008317012416227954, "clip_ratio/low_mean": 0.0006571313606400508, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014888326222717296, "epoch": 7.615743440233236, "grad_norm": 0.18904799222946167, "learning_rate": 1e-06, "loss": -0.023, "step": 815 }, { "clip_ratio/high_max": 0.0020969071847503074, "clip_ratio/high_mean": 0.0008450360255665146, "clip_ratio/low_mean": 0.0007518708243878791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015969068226695526, "epoch": 7.625072886297376, "grad_norm": 0.15184339880943298, "learning_rate": 1e-06, "loss": -0.009, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0633370535714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 784.9634399414062, "completions/mean_terminated_length": 561.0714111328125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 7.634402332361516, "frac_reward_zero_std": 0.6579241156578064, "grad_norm": 0.14981989562511444, "learning_rate": 1e-06, "loss": -0.0324, "num_tokens": 479552051.0, "reward": 0.6510184407234192, "reward_std": 0.1513964682817459, "rewards/simpleverify_reward/mean": 0.6510184407234192, "rewards/simpleverify_reward/std": 0.4766647517681122, "step": 817 }, { "clip_ratio/high_max": 0.0017694023990770802, "clip_ratio/high_mean": 0.0006254221871131449, "clip_ratio/low_mean": 0.00041877368767018197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010441958875162527, "epoch": 7.643731778425656, "grad_norm": 0.1406858265399933, "learning_rate": 1e-06, "loss": -0.0215, "step": 818 }, { "clip_ratio/high_max": 0.0016617134788248222, "clip_ratio/high_mean": 0.0006456297542172251, "clip_ratio/low_mean": 0.0004748181336253765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001120447868743213, "epoch": 7.653061224489796, "grad_norm": 0.15612372756004333, "learning_rate": 1e-06, "loss": 0.0006, "step": 819 }, { "clip_ratio/high_max": 0.0016346989659723477, "clip_ratio/high_mean": 0.0005800592334708199, "clip_ratio/low_mean": 0.0004541078387774178, "clip_ratio/low_min": 2.1477662812685594e-05, "clip_ratio/region_mean": 0.0010341670567868277, "epoch": 7.662390670553936, "grad_norm": 0.16599968075752258, "learning_rate": 1e-06, "loss": 0.0073, "step": 820 }, { "clip_ratio/high_max": 0.0019647065237222705, "clip_ratio/high_mean": 0.0007543004321632907, "clip_ratio/low_mean": 0.0005936643246968742, "clip_ratio/low_min": 1.9797276763711125e-05, "clip_ratio/region_mean": 0.0013479647423082497, "epoch": 7.671720116618076, "grad_norm": 0.18290185928344727, "learning_rate": 1e-06, "loss": -0.018, "step": 821 }, { "clip_ratio/high_max": 0.0018077194472425617, "clip_ratio/high_mean": 0.0007580148885608651, "clip_ratio/low_mean": 0.0005482257834046322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001306240692429128, "epoch": 7.681049562682215, "grad_norm": 0.14898940920829773, "learning_rate": 1e-06, "loss": -0.0466, "step": 822 }, { "clip_ratio/high_max": 0.0016052475402830169, "clip_ratio/high_mean": 0.0007113271176422131, "clip_ratio/low_mean": 0.0005004018903491669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001211728998896433, "epoch": 7.690379008746356, "grad_norm": 0.17639468610286713, "learning_rate": 1e-06, "loss": -0.0164, "step": 823 }, { "clip_ratio/high_max": 0.0017938724377017934, "clip_ratio/high_mean": 0.0007730451816314599, "clip_ratio/low_mean": 0.0005875573233424802, "clip_ratio/low_min": 1.4962892237235792e-05, "clip_ratio/region_mean": 0.0013606025167973712, "epoch": 7.699708454810495, "grad_norm": 0.180489644408226, "learning_rate": 1e-06, "loss": -0.0636, "step": 824 }, { "clip_ratio/high_max": 0.0023506539437221363, "clip_ratio/high_mean": 0.0008965894903667504, "clip_ratio/low_mean": 0.0005891657460779243, "clip_ratio/low_min": 1.676951978879515e-05, "clip_ratio/region_mean": 0.001485755230532959, "epoch": 7.709037900874636, "grad_norm": 0.17546361684799194, "learning_rate": 1e-06, "loss": -0.0503, "step": 825 }, { "clip_ratio/high_max": 0.0019969902423326857, "clip_ratio/high_mean": 0.0007845439313314273, "clip_ratio/low_mean": 0.000580653394536057, "clip_ratio/low_min": 1.711859840725083e-05, "clip_ratio/region_mean": 0.0013651973276864737, "epoch": 7.718367346938775, "grad_norm": 0.1548464596271515, "learning_rate": 1e-06, "loss": -0.0329, "step": 826 }, { "clip_ratio/high_max": 0.0022569821048818994, "clip_ratio/high_mean": 0.0009278337038267637, "clip_ratio/low_mean": 0.0004645756707759574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013924093582318164, "epoch": 7.727696793002916, "grad_norm": 0.14402812719345093, "learning_rate": 1e-06, "loss": -0.0447, "step": 827 }, { "clip_ratio/high_max": 0.0021144789425306953, "clip_ratio/high_mean": 0.000795158906839788, "clip_ratio/low_mean": 0.0007951401839818573, "clip_ratio/low_min": 4.470267413125839e-05, "clip_ratio/region_mean": 0.0015902991144685075, "epoch": 7.737026239067055, "grad_norm": 0.17861518263816833, "learning_rate": 1e-06, "loss": 0.0015, "step": 828 }, { "clip_ratio/high_max": 0.0020589096675394103, "clip_ratio/high_mean": 0.0007788707262079697, "clip_ratio/low_mean": 0.0006027170275046956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001381587760988623, "epoch": 7.746355685131196, "grad_norm": 0.14513832330703735, "learning_rate": 1e-06, "loss": -0.0018, "step": 829 }, { "clip_ratio/high_max": 0.002188673614000436, "clip_ratio/high_mean": 0.0008243337342719315, "clip_ratio/low_mean": 0.0006482751978182932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014726089575560763, "epoch": 7.755685131195335, "grad_norm": 0.1447298526763916, "learning_rate": 1e-06, "loss": -0.0367, "step": 830 }, { "clip_ratio/high_max": 0.0020075662723684218, "clip_ratio/high_mean": 0.000736988718927023, "clip_ratio/low_mean": 0.0007066194420985994, "clip_ratio/low_min": 4.4641867134487256e-05, "clip_ratio/region_mean": 0.0014436081692110747, "epoch": 7.765014577259475, "grad_norm": 0.1822243481874466, "learning_rate": 1e-06, "loss": 0.0003, "step": 831 }, { "clip_ratio/high_max": 0.00229170655074995, "clip_ratio/high_mean": 0.000857899549373542, "clip_ratio/low_mean": 0.0005775272893515648, "clip_ratio/low_min": 4.465083111426793e-05, "clip_ratio/region_mean": 0.001435426853277022, "epoch": 7.774344023323615, "grad_norm": 0.17902734875679016, "learning_rate": 1e-06, "loss": -0.0508, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0537806919642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 747.1600341796875, "completions/mean_terminated_length": 556.8204956054688, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 7.783673469387755, "frac_reward_zero_std": 0.6897321939468384, "grad_norm": 0.1577620953321457, "learning_rate": 1e-06, "loss": -0.0187, "num_tokens": 488523145.0, "reward": 0.6656668782234192, "reward_std": 0.13490019738674164, "rewards/simpleverify_reward/mean": 0.6656668782234192, "rewards/simpleverify_reward/std": 0.47177326679229736, "step": 833 }, { "clip_ratio/high_max": 0.0015099598240340129, "clip_ratio/high_mean": 0.0005884524898647214, "clip_ratio/low_mean": 0.00042052371782119735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010089762217830867, "epoch": 7.793002915451895, "grad_norm": 0.23535661399364471, "learning_rate": 1e-06, "loss": -0.0275, "step": 834 }, { "clip_ratio/high_max": 0.0018599990144139156, "clip_ratio/high_mean": 0.0007026594448689139, "clip_ratio/low_mean": 0.00044787020533476607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011505296461109538, "epoch": 7.802332361516035, "grad_norm": 0.15710200369358063, "learning_rate": 1e-06, "loss": -0.0417, "step": 835 }, { "clip_ratio/high_max": 0.0016648262098897249, "clip_ratio/high_mean": 0.0006166650109662442, "clip_ratio/low_mean": 0.0003806344320764765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000997299466689583, "epoch": 7.811661807580175, "grad_norm": 0.14816677570343018, "learning_rate": 1e-06, "loss": -0.0257, "step": 836 }, { "clip_ratio/high_max": 0.001609447157534305, "clip_ratio/high_mean": 0.0007168358642957173, "clip_ratio/low_mean": 0.0004171452019363642, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011339810735080391, "epoch": 7.820991253644315, "grad_norm": 0.14706405997276306, "learning_rate": 1e-06, "loss": -0.0404, "step": 837 }, { "clip_ratio/high_max": 0.0016157161844603252, "clip_ratio/high_mean": 0.0005893084198760334, "clip_ratio/low_mean": 0.0004027373761346098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000992045781458728, "epoch": 7.830320699708455, "grad_norm": 0.16877339780330658, "learning_rate": 1e-06, "loss": -0.02, "step": 838 }, { "clip_ratio/high_max": 0.0014973306879255688, "clip_ratio/high_mean": 0.0005286970053930418, "clip_ratio/low_mean": 0.0005088314878776146, "clip_ratio/low_min": 9.58736018219497e-06, "clip_ratio/region_mean": 0.0010375285128247924, "epoch": 7.839650145772595, "grad_norm": 0.15214215219020844, "learning_rate": 1e-06, "loss": 0.0326, "step": 839 }, { "clip_ratio/high_max": 0.001831694069551304, "clip_ratio/high_mean": 0.0006582613013961236, "clip_ratio/low_mean": 0.00048169033561862307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011399516624805983, "epoch": 7.848979591836734, "grad_norm": 0.1726624071598053, "learning_rate": 1e-06, "loss": 0.0028, "step": 840 }, { "clip_ratio/high_max": 0.002091318594466429, "clip_ratio/high_mean": 0.0007338797986449208, "clip_ratio/low_mean": 0.0005320067093634862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012658864943659864, "epoch": 7.858309037900875, "grad_norm": 0.19995518028736115, "learning_rate": 1e-06, "loss": -0.0162, "step": 841 }, { "clip_ratio/high_max": 0.0016689515468897298, "clip_ratio/high_mean": 0.0006294758632066078, "clip_ratio/low_mean": 0.0005013959239477117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011308717912470456, "epoch": 7.867638483965014, "grad_norm": 0.19889438152313232, "learning_rate": 1e-06, "loss": -0.0473, "step": 842 }, { "clip_ratio/high_max": 0.0014512951456708834, "clip_ratio/high_mean": 0.0006383156942320056, "clip_ratio/low_mean": 0.0005579863509410643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011963020551775116, "epoch": 7.876967930029155, "grad_norm": 0.17005623877048492, "learning_rate": 1e-06, "loss": -0.0209, "step": 843 }, { "clip_ratio/high_max": 0.0022733917212462984, "clip_ratio/high_mean": 0.000823682630652911, "clip_ratio/low_mean": 0.0006103636142142932, "clip_ratio/low_min": 1.1371907021384686e-05, "clip_ratio/region_mean": 0.0014340462512336671, "epoch": 7.886297376093294, "grad_norm": 0.17048496007919312, "learning_rate": 1e-06, "loss": -0.0223, "step": 844 }, { "clip_ratio/high_max": 0.0021309400253812782, "clip_ratio/high_mean": 0.0008312684512929991, "clip_ratio/low_mean": 0.0005948559319222113, "clip_ratio/low_min": 2.082292121485807e-05, "clip_ratio/region_mean": 0.0014261243923101574, "epoch": 7.895626822157435, "grad_norm": 0.1665126383304596, "learning_rate": 1e-06, "loss": -0.0647, "step": 845 }, { "clip_ratio/high_max": 0.0019101999641861767, "clip_ratio/high_mean": 0.0008238748832809506, "clip_ratio/low_mean": 0.0005560869951750647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013799619100609561, "epoch": 8.00932944606414, "grad_norm": 0.1582695096731186, "learning_rate": 1e-06, "loss": -0.0016, "step": 846 }, { "clip_ratio/high_max": 0.0018581174590508454, "clip_ratio/high_mean": 0.0007508363378292415, "clip_ratio/low_mean": 0.0007309021548280725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014817384617344942, "epoch": 8.018658892128279, "grad_norm": 0.2810385227203369, "learning_rate": 1e-06, "loss": 0.0318, "step": 847 }, { "clip_ratio/high_max": 0.0019969653003499843, "clip_ratio/high_mean": 0.0008156164949468803, "clip_ratio/low_mean": 0.0007101256760506658, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015257421837304719, "epoch": 8.02798833819242, "grad_norm": 0.17291241884231567, "learning_rate": 1e-06, "loss": -0.0107, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0661969866071429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 788.8703002929688, "completions/mean_terminated_length": 554.4288940429688, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 8.03731778425656, "frac_reward_zero_std": 0.6707589626312256, "grad_norm": 0.17994125187397003, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 497397533.0, "reward": 0.6599470376968384, "reward_std": 0.1449192315340042, "rewards/simpleverify_reward/mean": 0.6599469780921936, "rewards/simpleverify_reward/std": 0.473743200302124, "step": 849 }, { "clip_ratio/high_max": 0.0016802004975033924, "clip_ratio/high_mean": 0.0005835280644532759, "clip_ratio/low_mean": 0.0004027642044093227, "clip_ratio/low_min": 1.1265320608799811e-05, "clip_ratio/region_mean": 0.0009862922452157363, "epoch": 8.0466472303207, "grad_norm": 0.1567344069480896, "learning_rate": 1e-06, "loss": -0.0356, "step": 850 }, { "clip_ratio/high_max": 0.0015269631330738775, "clip_ratio/high_mean": 0.0006541569709952455, "clip_ratio/low_mean": 0.00049364289316145, "clip_ratio/low_min": 1.5348723536590114e-05, "clip_ratio/region_mean": 0.0011477998814370949, "epoch": 8.055976676384839, "grad_norm": 0.165390744805336, "learning_rate": 1e-06, "loss": -0.0183, "step": 851 }, { "clip_ratio/high_max": 0.0017281611617363524, "clip_ratio/high_mean": 0.0006193411973072216, "clip_ratio/low_mean": 0.00040090768970912904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001020248862914741, "epoch": 8.06530612244898, "grad_norm": 0.14335423707962036, "learning_rate": 1e-06, "loss": -0.0112, "step": 852 }, { "clip_ratio/high_max": 0.0021292970268405043, "clip_ratio/high_mean": 0.0007428900125887594, "clip_ratio/low_mean": 0.00046850316766722244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001211393166158814, "epoch": 8.07463556851312, "grad_norm": 0.14712248742580414, "learning_rate": 1e-06, "loss": -0.0554, "step": 853 }, { "clip_ratio/high_max": 0.001586140730069019, "clip_ratio/high_mean": 0.0006178875437399256, "clip_ratio/low_mean": 0.00041052478900382994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010284123454766814, "epoch": 8.08396501457726, "grad_norm": 0.14856679737567902, "learning_rate": 1e-06, "loss": -0.0338, "step": 854 }, { "clip_ratio/high_max": 0.0019300887379358755, "clip_ratio/high_mean": 0.0007548224639322143, "clip_ratio/low_mean": 0.0004348579200268432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011896803880517837, "epoch": 8.093294460641399, "grad_norm": 0.16297124326229095, "learning_rate": 1e-06, "loss": -0.0157, "step": 855 }, { "clip_ratio/high_max": 0.0019167002283211332, "clip_ratio/high_mean": 0.0007637096114194719, "clip_ratio/low_mean": 0.0005692253143934067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00133293489125208, "epoch": 8.102623906705539, "grad_norm": 0.17099975049495697, "learning_rate": 1e-06, "loss": -0.0606, "step": 856 }, { "clip_ratio/high_max": 0.001731727330479771, "clip_ratio/high_mean": 0.000671222131131799, "clip_ratio/low_mean": 0.0005453717512864387, "clip_ratio/low_min": 1.3790820958092809e-05, "clip_ratio/region_mean": 0.0012165938969701529, "epoch": 8.11195335276968, "grad_norm": 0.17840510606765747, "learning_rate": 1e-06, "loss": -0.0237, "step": 857 }, { "clip_ratio/high_max": 0.0019022206179215573, "clip_ratio/high_mean": 0.0008061506550802733, "clip_ratio/low_mean": 0.0005049445835538791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013110952713759616, "epoch": 8.12128279883382, "grad_norm": 0.1815241128206253, "learning_rate": 1e-06, "loss": -0.027, "step": 858 }, { "clip_ratio/high_max": 0.0018990998760273214, "clip_ratio/high_mean": 0.0006350487637973856, "clip_ratio/low_mean": 0.0006538561065099202, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012889048703073058, "epoch": 8.130612244897959, "grad_norm": 0.15632136166095734, "learning_rate": 1e-06, "loss": 0.0292, "step": 859 }, { "clip_ratio/high_max": 0.002177164831664413, "clip_ratio/high_mean": 0.0008494076646456961, "clip_ratio/low_mean": 0.0005988074592551129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001448215149139287, "epoch": 8.139941690962099, "grad_norm": 0.1704251617193222, "learning_rate": 1e-06, "loss": -0.0485, "step": 860 }, { "clip_ratio/high_max": 0.0020736097721965052, "clip_ratio/high_mean": 0.0008957359714258928, "clip_ratio/low_mean": 0.0008154212237059255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017111572014982812, "epoch": 8.14927113702624, "grad_norm": 0.15005381405353546, "learning_rate": 1e-06, "loss": -0.036, "step": 861 }, { "clip_ratio/high_max": 0.0015928496322885621, "clip_ratio/high_mean": 0.00063810109077167, "clip_ratio/low_mean": 0.0005474657773447689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001185566852655029, "epoch": 8.15860058309038, "grad_norm": 0.256352037191391, "learning_rate": 1e-06, "loss": -0.0056, "step": 862 }, { "clip_ratio/high_max": 0.00197625724831596, "clip_ratio/high_mean": 0.000788267108873697, "clip_ratio/low_mean": 0.0007625742491654819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015508413343923166, "epoch": 8.167930029154519, "grad_norm": 0.15625400841236115, "learning_rate": 1e-06, "loss": -0.0319, "step": 863 }, { "clip_ratio/high_max": 0.0020140287997492123, "clip_ratio/high_mean": 0.0008538680449419189, "clip_ratio/low_mean": 0.0006968220968701644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001550690139993094, "epoch": 8.177259475218658, "grad_norm": 0.16428442299365997, "learning_rate": 1e-06, "loss": -0.0254, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0614536830357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 767.8736572265625, "completions/mean_terminated_length": 549.9561157226562, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 8.186588921282798, "frac_reward_zero_std": 0.6835938096046448, "grad_norm": 0.15470105409622192, "learning_rate": 1e-06, "loss": -0.0648, "num_tokens": 506219001.0, "reward": 0.666015625, "reward_std": 0.13839474320411682, "rewards/simpleverify_reward/mean": 0.666015625, "rewards/simpleverify_reward/std": 0.471650630235672, "step": 865 }, { "clip_ratio/high_max": 0.001371734295389615, "clip_ratio/high_mean": 0.00047420148075616453, "clip_ratio/low_mean": 0.00048567590874881716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009598773685866036, "epoch": 8.19591836734694, "grad_norm": 0.1735559105873108, "learning_rate": 1e-06, "loss": 0.0306, "step": 866 }, { "clip_ratio/high_max": 0.0018371979567746166, "clip_ratio/high_mean": 0.0006904399879203993, "clip_ratio/low_mean": 0.00041775503359531285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011081950397056062, "epoch": 8.205247813411079, "grad_norm": 0.14385294914245605, "learning_rate": 1e-06, "loss": -0.0236, "step": 867 }, { "clip_ratio/high_max": 0.0016952032292465447, "clip_ratio/high_mean": 0.0006520757369798957, "clip_ratio/low_mean": 0.0006199057670528418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001271981524041621, "epoch": 8.214577259475218, "grad_norm": 0.19405131042003632, "learning_rate": 1e-06, "loss": 0.0238, "step": 868 }, { "clip_ratio/high_max": 0.00150130831025308, "clip_ratio/high_mean": 0.0005590062273768126, "clip_ratio/low_mean": 0.0004664985099225305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010255047127429862, "epoch": 8.223906705539358, "grad_norm": 0.128067284822464, "learning_rate": 1e-06, "loss": -0.0188, "step": 869 }, { "clip_ratio/high_max": 0.0014848813589196652, "clip_ratio/high_mean": 0.0004643759057216812, "clip_ratio/low_mean": 0.00043824011754622916, "clip_ratio/low_min": 1.3616558135254309e-05, "clip_ratio/region_mean": 0.0009026160187204368, "epoch": 8.2332361516035, "grad_norm": 0.1572960764169693, "learning_rate": 1e-06, "loss": -0.0398, "step": 870 }, { "clip_ratio/high_max": 0.0018887504302256275, "clip_ratio/high_mean": 0.0007234058884932892, "clip_ratio/low_mean": 0.000558296182362028, "clip_ratio/low_min": 3.728004594449885e-05, "clip_ratio/region_mean": 0.0012817020779039012, "epoch": 8.242565597667639, "grad_norm": 0.4015250504016876, "learning_rate": 1e-06, "loss": -0.0145, "step": 871 }, { "clip_ratio/high_max": 0.0017195759392052423, "clip_ratio/high_mean": 0.0006899598838572274, "clip_ratio/low_mean": 0.0004500488389567181, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011400087059882935, "epoch": 8.251895043731778, "grad_norm": 0.1486574411392212, "learning_rate": 1e-06, "loss": -0.0303, "step": 872 }, { "clip_ratio/high_max": 0.0018969704906339757, "clip_ratio/high_mean": 0.0006651459461863851, "clip_ratio/low_mean": 0.000606499153036566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00127164508739952, "epoch": 8.261224489795918, "grad_norm": 0.14896473288536072, "learning_rate": 1e-06, "loss": -0.0099, "step": 873 }, { "clip_ratio/high_max": 0.0022398939981940202, "clip_ratio/high_mean": 0.0008708387103979476, "clip_ratio/low_mean": 0.0006988972818362527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015697360577178188, "epoch": 8.270553935860057, "grad_norm": 0.1894596964120865, "learning_rate": 1e-06, "loss": -0.0716, "step": 874 }, { "clip_ratio/high_max": 0.0023601529428560752, "clip_ratio/high_mean": 0.0007892405556049198, "clip_ratio/low_mean": 0.0007252845343828085, "clip_ratio/low_min": 1.3718174159293994e-05, "clip_ratio/region_mean": 0.0015145250836212654, "epoch": 8.279883381924199, "grad_norm": 0.17327351868152618, "learning_rate": 1e-06, "loss": -0.0299, "step": 875 }, { "clip_ratio/high_max": 0.001868507020844845, "clip_ratio/high_mean": 0.0007616391194460448, "clip_ratio/low_mean": 0.0007078343060129555, "clip_ratio/low_min": 1.1015156815119553e-05, "clip_ratio/region_mean": 0.0014694734563818201, "epoch": 8.289212827988338, "grad_norm": 0.1700202077627182, "learning_rate": 1e-06, "loss": -0.0155, "step": 876 }, { "clip_ratio/high_max": 0.0019742977819987573, "clip_ratio/high_mean": 0.0007711689431744162, "clip_ratio/low_mean": 0.0006539527248605737, "clip_ratio/low_min": 2.6957084628520533e-05, "clip_ratio/region_mean": 0.0014251216489356011, "epoch": 8.298542274052478, "grad_norm": 0.1574561893939972, "learning_rate": 1e-06, "loss": -0.0074, "step": 877 }, { "clip_ratio/high_max": 0.0020974307553842664, "clip_ratio/high_mean": 0.0008505298483214574, "clip_ratio/low_mean": 0.0005603396257356508, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014108694631431717, "epoch": 8.307871720116617, "grad_norm": 0.2184046506881714, "learning_rate": 1e-06, "loss": -0.0534, "step": 878 }, { "clip_ratio/high_max": 0.0019127117957395967, "clip_ratio/high_mean": 0.0007680976646042836, "clip_ratio/low_mean": 0.0006792483734443522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001447346032364294, "epoch": 8.317201166180759, "grad_norm": 0.12927134335041046, "learning_rate": 1e-06, "loss": -0.0237, "step": 879 }, { "clip_ratio/high_max": 0.002046287183475215, "clip_ratio/high_mean": 0.0008172179041139316, "clip_ratio/low_mean": 0.0006078003079892369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014250181593524758, "epoch": 8.326530612244898, "grad_norm": 0.1542949378490448, "learning_rate": 1e-06, "loss": -0.0137, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0697544642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 801.2703247070312, "completions/mean_terminated_length": 554.2149658203125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 8.335860058309038, "frac_reward_zero_std": 0.688058078289032, "grad_norm": 0.15939128398895264, "learning_rate": 1e-06, "loss": -0.0304, "num_tokens": 515057924.0, "reward": 0.6520647406578064, "reward_std": 0.13734860718250275, "rewards/simpleverify_reward/mean": 0.6520647406578064, "rewards/simpleverify_reward/std": 0.4763319492340088, "step": 881 }, { "clip_ratio/high_max": 0.0013936792311142199, "clip_ratio/high_mean": 0.0005592919787886785, "clip_ratio/low_mean": 0.00043747481822720147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009967668011086062, "epoch": 8.345189504373177, "grad_norm": 0.16833184659481049, "learning_rate": 1e-06, "loss": -0.0379, "step": 882 }, { "clip_ratio/high_max": 0.0014789550441491883, "clip_ratio/high_mean": 0.0005644798375215032, "clip_ratio/low_mean": 0.00041244652447858243, "clip_ratio/low_min": 2.1807396478834562e-05, "clip_ratio/region_mean": 0.0009769263615453383, "epoch": 8.354518950437317, "grad_norm": 0.174534872174263, "learning_rate": 1e-06, "loss": -0.0099, "step": 883 }, { "clip_ratio/high_max": 0.0020360436574264895, "clip_ratio/high_mean": 0.0007078553244355135, "clip_ratio/low_mean": 0.000347487815815839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010553431420703419, "epoch": 8.363848396501458, "grad_norm": 0.1469399333000183, "learning_rate": 1e-06, "loss": -0.0672, "step": 884 }, { "clip_ratio/high_max": 0.0016586801211815327, "clip_ratio/high_mean": 0.0006491339609056013, "clip_ratio/low_mean": 0.00040488683498551836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010540207840676885, "epoch": 8.373177842565598, "grad_norm": 0.30360445380210876, "learning_rate": 1e-06, "loss": -0.0374, "step": 885 }, { "clip_ratio/high_max": 0.0019171807834936772, "clip_ratio/high_mean": 0.000661216901789885, "clip_ratio/low_mean": 0.0006423088889278006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001303525765251834, "epoch": 8.382507288629737, "grad_norm": 0.15910163521766663, "learning_rate": 1e-06, "loss": -0.0006, "step": 886 }, { "clip_ratio/high_max": 0.0017478661393397488, "clip_ratio/high_mean": 0.0007715442879998591, "clip_ratio/low_mean": 0.0005994841012579855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013710283892578445, "epoch": 8.391836734693877, "grad_norm": 0.14933861792087555, "learning_rate": 1e-06, "loss": -0.0409, "step": 887 }, { "clip_ratio/high_max": 0.0019714515619853046, "clip_ratio/high_mean": 0.0006781840747862589, "clip_ratio/low_mean": 0.0006105925376687082, "clip_ratio/low_min": 1.1632235327851959e-05, "clip_ratio/region_mean": 0.0012887766242783982, "epoch": 8.401166180758018, "grad_norm": 0.1691121757030487, "learning_rate": 1e-06, "loss": 0.0584, "step": 888 }, { "clip_ratio/high_max": 0.0018821779485733714, "clip_ratio/high_mean": 0.0006420242298190715, "clip_ratio/low_mean": 0.0006397874367394252, "clip_ratio/low_min": 1.5952016838127747e-05, "clip_ratio/region_mean": 0.0012818117029382847, "epoch": 8.410495626822158, "grad_norm": 0.1788662075996399, "learning_rate": 1e-06, "loss": 0.0074, "step": 889 }, { "clip_ratio/high_max": 0.0019681268931890372, "clip_ratio/high_mean": 0.0006869856115372386, "clip_ratio/low_mean": 0.0006274421475609415, "clip_ratio/low_min": 1.3050741472397931e-05, "clip_ratio/region_mean": 0.001314427765464643, "epoch": 8.419825072886297, "grad_norm": 0.14574170112609863, "learning_rate": 1e-06, "loss": 0.0012, "step": 890 }, { "clip_ratio/high_max": 0.0021206787168921437, "clip_ratio/high_mean": 0.0007522479572799057, "clip_ratio/low_mean": 0.0005808741243527038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013331220834515989, "epoch": 8.429154518950437, "grad_norm": 0.18270836770534515, "learning_rate": 1e-06, "loss": -0.0241, "step": 891 }, { "clip_ratio/high_max": 0.002173107583075762, "clip_ratio/high_mean": 0.0007939054939924972, "clip_ratio/low_mean": 0.0007084902335918741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015023957421362866, "epoch": 8.438483965014576, "grad_norm": 0.18892639875411987, "learning_rate": 1e-06, "loss": -0.0351, "step": 892 }, { "clip_ratio/high_max": 0.0018916069348051678, "clip_ratio/high_mean": 0.0007560577414551517, "clip_ratio/low_mean": 0.0006990867241256637, "clip_ratio/low_min": 4.7718908717797603e-05, "clip_ratio/region_mean": 0.0014551444473909214, "epoch": 8.447813411078718, "grad_norm": 0.17127162218093872, "learning_rate": 1e-06, "loss": -0.0019, "step": 893 }, { "clip_ratio/high_max": 0.0019394887931412086, "clip_ratio/high_mean": 0.000847674060423742, "clip_ratio/low_mean": 0.0006340550826280378, "clip_ratio/low_min": 4.629034083336592e-05, "clip_ratio/region_mean": 0.0014817291594226845, "epoch": 8.457142857142857, "grad_norm": 0.16858655214309692, "learning_rate": 1e-06, "loss": -0.0441, "step": 894 }, { "clip_ratio/high_max": 0.0021445504535222426, "clip_ratio/high_mean": 0.0008681833041919162, "clip_ratio/low_mean": 0.000648071477371559, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015162547715590335, "epoch": 8.466472303206997, "grad_norm": 0.17232348024845123, "learning_rate": 1e-06, "loss": -0.0164, "step": 895 }, { "clip_ratio/high_max": 0.0020819171513721813, "clip_ratio/high_mean": 0.0008952906537160743, "clip_ratio/low_mean": 0.0006250512096812599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001520341873401776, "epoch": 8.475801749271136, "grad_norm": 0.1585269272327423, "learning_rate": 1e-06, "loss": -0.0615, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0631975446428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 784.4945068359375, "completions/mean_terminated_length": 561.0972900390625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 8.485131195335278, "frac_reward_zero_std": 0.6640625, "grad_norm": 0.15702396631240845, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 524027461.0, "reward": 0.6554129719734192, "reward_std": 0.1454915553331375, "rewards/simpleverify_reward/mean": 0.6554129719734192, "rewards/simpleverify_reward/std": 0.4752499759197235, "step": 897 }, { "clip_ratio/high_max": 0.0017127037353930064, "clip_ratio/high_mean": 0.0006659155742454459, "clip_ratio/low_mean": 0.0003944291456718929, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010603447153698653, "epoch": 8.494460641399417, "grad_norm": 0.1824866086244583, "learning_rate": 1e-06, "loss": -0.0155, "step": 898 }, { "clip_ratio/high_max": 0.0018186215093010105, "clip_ratio/high_mean": 0.0006872058020235272, "clip_ratio/low_mean": 0.0004633334619938978, "clip_ratio/low_min": 1.059501573763555e-05, "clip_ratio/region_mean": 0.0011505392685648985, "epoch": 8.503790087463557, "grad_norm": 0.17758487164974213, "learning_rate": 1e-06, "loss": -0.014, "step": 899 }, { "clip_ratio/high_max": 0.0018214531664852984, "clip_ratio/high_mean": 0.0006548125620611245, "clip_ratio/low_mean": 0.0004423463169587194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010971589126711478, "epoch": 8.513119533527696, "grad_norm": 0.16419927775859833, "learning_rate": 1e-06, "loss": -0.0188, "step": 900 }, { "clip_ratio/high_max": 0.0018851204295060597, "clip_ratio/high_mean": 0.0007843953408155357, "clip_ratio/low_mean": 0.00046884555649739923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012532408945844509, "epoch": 8.522448979591836, "grad_norm": 0.17884419858455658, "learning_rate": 1e-06, "loss": -0.0296, "step": 901 }, { "clip_ratio/high_max": 0.0016564240140723996, "clip_ratio/high_mean": 0.0007010768968029879, "clip_ratio/low_mean": 0.0005325651427483535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012336420404608361, "epoch": 8.531778425655977, "grad_norm": 0.1963464468717575, "learning_rate": 1e-06, "loss": -0.0309, "step": 902 }, { "clip_ratio/high_max": 0.0017919570818776265, "clip_ratio/high_mean": 0.0006994216510065598, "clip_ratio/low_mean": 0.0005371390207074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012365606671664864, "epoch": 8.541107871720117, "grad_norm": 0.17385783791542053, "learning_rate": 1e-06, "loss": -0.0241, "step": 903 }, { "clip_ratio/high_max": 0.002085795975290239, "clip_ratio/high_mean": 0.0007832251867512241, "clip_ratio/low_mean": 0.0005375321134124533, "clip_ratio/low_min": 1.986649658647366e-05, "clip_ratio/region_mean": 0.0013207572847022675, "epoch": 8.550437317784256, "grad_norm": 0.16436436772346497, "learning_rate": 1e-06, "loss": -0.0211, "step": 904 }, { "clip_ratio/high_max": 0.0018285778605786618, "clip_ratio/high_mean": 0.000702007595464238, "clip_ratio/low_mean": 0.0005761819220424513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001278189527511131, "epoch": 8.559766763848396, "grad_norm": 0.15666452050209045, "learning_rate": 1e-06, "loss": -0.001, "step": 905 }, { "clip_ratio/high_max": 0.00192765322572086, "clip_ratio/high_mean": 0.0008104655971692409, "clip_ratio/low_mean": 0.0005374170491450059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013478826367645524, "epoch": 8.569096209912537, "grad_norm": 0.3102234899997711, "learning_rate": 1e-06, "loss": -0.0412, "step": 906 }, { "clip_ratio/high_max": 0.002355557087867055, "clip_ratio/high_mean": 0.0008466208273603115, "clip_ratio/low_mean": 0.000528857633980806, "clip_ratio/low_min": 1.2768130545737222e-05, "clip_ratio/region_mean": 0.0013754784449702129, "epoch": 8.578425655976677, "grad_norm": 0.16835682094097137, "learning_rate": 1e-06, "loss": -0.0436, "step": 907 }, { "clip_ratio/high_max": 0.0022614882691414095, "clip_ratio/high_mean": 0.0009399640894116601, "clip_ratio/low_mean": 0.0006167400497361086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015567042064503767, "epoch": 8.587755102040816, "grad_norm": 0.16862867772579193, "learning_rate": 1e-06, "loss": -0.0693, "step": 908 }, { "clip_ratio/high_max": 0.0019341925108165015, "clip_ratio/high_mean": 0.0007301243349502329, "clip_ratio/low_mean": 0.0006944714114069939, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014245957718230784, "epoch": 8.597084548104956, "grad_norm": 0.15582375228405, "learning_rate": 1e-06, "loss": -0.0396, "step": 909 }, { "clip_ratio/high_max": 0.0022991478035692126, "clip_ratio/high_mean": 0.0009210256648657378, "clip_ratio/low_mean": 0.0007392180905299028, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016602437499386724, "epoch": 8.606413994169095, "grad_norm": 0.15749192237854004, "learning_rate": 1e-06, "loss": 0.0012, "step": 910 }, { "clip_ratio/high_max": 0.0019761041476158425, "clip_ratio/high_mean": 0.0008405353510170244, "clip_ratio/low_mean": 0.0006672517974948278, "clip_ratio/low_min": 1.1326567801006604e-05, "clip_ratio/region_mean": 0.001507787179434672, "epoch": 8.615743440233237, "grad_norm": 0.17922130227088928, "learning_rate": 1e-06, "loss": -0.0396, "step": 911 }, { "clip_ratio/high_max": 0.002170774791011354, "clip_ratio/high_mean": 0.0007785928087287175, "clip_ratio/low_mean": 0.0006922007632965688, "clip_ratio/low_min": 2.508025681891013e-05, "clip_ratio/region_mean": 0.0014707935952174012, "epoch": 8.625072886297376, "grad_norm": 0.21536077558994293, "learning_rate": 1e-06, "loss": 0.0204, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0635463169642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 770.6015014648438, "completions/mean_terminated_length": 544.9450073242188, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 8.634402332361516, "frac_reward_zero_std": 0.6791294813156128, "grad_norm": 0.1680370718240738, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 532789044.0, "reward": 0.665108859539032, "reward_std": 0.14071911573410034, "rewards/simpleverify_reward/mean": 0.6651087999343872, "rewards/simpleverify_reward/std": 0.4719688594341278, "step": 913 }, { "clip_ratio/high_max": 0.0019070979360549245, "clip_ratio/high_mean": 0.0007364368802882382, "clip_ratio/low_mean": 0.0004058307076775236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011422675743233413, "epoch": 8.643731778425655, "grad_norm": 0.14524978399276733, "learning_rate": 1e-06, "loss": -0.0451, "step": 914 }, { "clip_ratio/high_max": 0.002006776543566957, "clip_ratio/high_mean": 0.0007375664190476527, "clip_ratio/low_mean": 0.00040581627399660647, "clip_ratio/low_min": 1.2445240827219095e-05, "clip_ratio/region_mean": 0.0011433827021392062, "epoch": 8.653061224489797, "grad_norm": 0.20701570808887482, "learning_rate": 1e-06, "loss": -0.0613, "step": 915 }, { "clip_ratio/high_max": 0.001559811378683662, "clip_ratio/high_mean": 0.0006516214798466535, "clip_ratio/low_mean": 0.00045485796454158844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011064794525736943, "epoch": 8.662390670553936, "grad_norm": 0.13970234990119934, "learning_rate": 1e-06, "loss": -0.0238, "step": 916 }, { "clip_ratio/high_max": 0.0019094772324024234, "clip_ratio/high_mean": 0.0007069931762089254, "clip_ratio/low_mean": 0.000398945395318151, "clip_ratio/low_min": 1.1961722520936746e-05, "clip_ratio/region_mean": 0.00110593857607455, "epoch": 8.671720116618076, "grad_norm": 0.1489570140838623, "learning_rate": 1e-06, "loss": -0.0379, "step": 917 }, { "clip_ratio/high_max": 0.002058959933492588, "clip_ratio/high_mean": 0.0007507753889512969, "clip_ratio/low_mean": 0.0004883393048658036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012391146810841747, "epoch": 8.681049562682215, "grad_norm": 0.14852213859558105, "learning_rate": 1e-06, "loss": 0.0018, "step": 918 }, { "clip_ratio/high_max": 0.0019515337990014814, "clip_ratio/high_mean": 0.0007572898593934951, "clip_ratio/low_mean": 0.0005567767075262964, "clip_ratio/low_min": 1.3551604752137791e-05, "clip_ratio/region_mean": 0.0013140665578248445, "epoch": 8.690379008746355, "grad_norm": 0.17862091958522797, "learning_rate": 1e-06, "loss": -0.0052, "step": 919 }, { "clip_ratio/high_max": 0.0020004875877930317, "clip_ratio/high_mean": 0.000780080932599958, "clip_ratio/low_mean": 0.00048462493032275233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012647058501897845, "epoch": 8.699708454810496, "grad_norm": 0.1498589664697647, "learning_rate": 1e-06, "loss": -0.0678, "step": 920 }, { "clip_ratio/high_max": 0.00239990177215077, "clip_ratio/high_mean": 0.0008854263251123484, "clip_ratio/low_mean": 0.0004994056780560641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013848320268152747, "epoch": 8.709037900874636, "grad_norm": 0.167356476187706, "learning_rate": 1e-06, "loss": -0.0733, "step": 921 }, { "clip_ratio/high_max": 0.0020114206345169805, "clip_ratio/high_mean": 0.000766438475693576, "clip_ratio/low_mean": 0.0006518863965538912, "clip_ratio/low_min": 1.7841850421973504e-05, "clip_ratio/region_mean": 0.0014183248931658454, "epoch": 8.718367346938775, "grad_norm": 0.17834888398647308, "learning_rate": 1e-06, "loss": -0.0119, "step": 922 }, { "clip_ratio/high_max": 0.0018703993191593327, "clip_ratio/high_mean": 0.0007124172534531681, "clip_ratio/low_mean": 0.0005930663123763225, "clip_ratio/low_min": 2.1972227841615677e-05, "clip_ratio/region_mean": 0.001305483587202616, "epoch": 8.727696793002915, "grad_norm": 0.16429275274276733, "learning_rate": 1e-06, "loss": 0.009, "step": 923 }, { "clip_ratio/high_max": 0.0018632500286912546, "clip_ratio/high_mean": 0.0007643872140761232, "clip_ratio/low_mean": 0.0006295983966992935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001393985603499459, "epoch": 8.737026239067056, "grad_norm": 0.1785808503627777, "learning_rate": 1e-06, "loss": -0.0245, "step": 924 }, { "clip_ratio/high_max": 0.0023469815714634024, "clip_ratio/high_mean": 0.0009259249509341316, "clip_ratio/low_mean": 0.0006262585884542204, "clip_ratio/low_min": 2.7914247766602784e-05, "clip_ratio/region_mean": 0.001552183530293405, "epoch": 8.746355685131196, "grad_norm": 0.16663958132266998, "learning_rate": 1e-06, "loss": -0.0315, "step": 925 }, { "clip_ratio/high_max": 0.002066862489300547, "clip_ratio/high_mean": 0.000700092703482369, "clip_ratio/low_mean": 0.0007456066232407466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001445699337637052, "epoch": 8.755685131195335, "grad_norm": 0.17953461408615112, "learning_rate": 1e-06, "loss": 0.0037, "step": 926 }, { "clip_ratio/high_max": 0.0019139971991535276, "clip_ratio/high_mean": 0.0007456122602889081, "clip_ratio/low_mean": 0.0007132939354050905, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014589061720471364, "epoch": 8.765014577259475, "grad_norm": 0.1519089639186859, "learning_rate": 1e-06, "loss": -0.0276, "step": 927 }, { "clip_ratio/high_max": 0.002132390014594421, "clip_ratio/high_mean": 0.0008495136980855023, "clip_ratio/low_mean": 0.0005568415281231864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014063551861909218, "epoch": 8.774344023323614, "grad_norm": 0.15491636097431183, "learning_rate": 1e-06, "loss": -0.0365, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.076171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 828.5704956054688, "completions/mean_terminated_length": 559.1629028320312, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 8.783673469387756, "frac_reward_zero_std": 0.6690848469734192, "grad_norm": 0.17512163519859314, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 541607086.0, "reward": 0.649483859539032, "reward_std": 0.14533281326293945, "rewards/simpleverify_reward/mean": 0.6494837999343872, "rewards/simpleverify_reward/std": 0.47714823484420776, "step": 929 }, { "clip_ratio/high_max": 0.0018744988738035318, "clip_ratio/high_mean": 0.0006789037579437718, "clip_ratio/low_mean": 0.00037645126121788053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001055355020071147, "epoch": 8.793002915451895, "grad_norm": 0.15747784078121185, "learning_rate": 1e-06, "loss": -0.0194, "step": 930 }, { "clip_ratio/high_max": 0.001532270613097353, "clip_ratio/high_mean": 0.0006502503811134375, "clip_ratio/low_mean": 0.00037305399200704414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010233043467451353, "epoch": 8.802332361516035, "grad_norm": 0.16202802956104279, "learning_rate": 1e-06, "loss": -0.0355, "step": 931 }, { "clip_ratio/high_max": 0.001660119101870805, "clip_ratio/high_mean": 0.0006567806103703333, "clip_ratio/low_mean": 0.0003731026349669264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010298832385160495, "epoch": 8.811661807580174, "grad_norm": 0.16236986219882965, "learning_rate": 1e-06, "loss": -0.0455, "step": 932 }, { "clip_ratio/high_max": 0.0020801092905458063, "clip_ratio/high_mean": 0.0007086211508067208, "clip_ratio/low_mean": 0.000534320218321227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012429413800418843, "epoch": 8.820991253644316, "grad_norm": 0.204059436917305, "learning_rate": 1e-06, "loss": -0.0529, "step": 933 }, { "clip_ratio/high_max": 0.001685560757323401, "clip_ratio/high_mean": 0.000729823575966293, "clip_ratio/low_mean": 0.0004601898731380061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011900134340976365, "epoch": 8.830320699708455, "grad_norm": 0.16973228752613068, "learning_rate": 1e-06, "loss": -0.0703, "step": 934 }, { "clip_ratio/high_max": 0.002023288507189136, "clip_ratio/high_mean": 0.0008279870362457586, "clip_ratio/low_mean": 0.0006244172982405871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001452404358133208, "epoch": 8.839650145772595, "grad_norm": 0.3031490743160248, "learning_rate": 1e-06, "loss": -0.0219, "step": 935 }, { "clip_ratio/high_max": 0.0019443521741777658, "clip_ratio/high_mean": 0.0007558971628895961, "clip_ratio/low_mean": 0.0005896811480852193, "clip_ratio/low_min": 1.1535622434166726e-05, "clip_ratio/region_mean": 0.0013455783046083525, "epoch": 8.848979591836734, "grad_norm": 0.19068634510040283, "learning_rate": 1e-06, "loss": -0.0439, "step": 936 }, { "clip_ratio/high_max": 0.0017182166702696122, "clip_ratio/high_mean": 0.000700879765645368, "clip_ratio/low_mean": 0.0005648434243994416, "clip_ratio/low_min": 1.4973646102589555e-05, "clip_ratio/region_mean": 0.001265723185497336, "epoch": 8.858309037900874, "grad_norm": 0.1448352336883545, "learning_rate": 1e-06, "loss": -0.0472, "step": 937 }, { "clip_ratio/high_max": 0.00200663670693757, "clip_ratio/high_mean": 0.0007156682822824223, "clip_ratio/low_mean": 0.0006558200320796459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013714883061766159, "epoch": 8.867638483965015, "grad_norm": 0.18196900188922882, "learning_rate": 1e-06, "loss": -0.0194, "step": 938 }, { "clip_ratio/high_max": 0.0019590530573623255, "clip_ratio/high_mean": 0.0007388846497633494, "clip_ratio/low_mean": 0.0005984812296446762, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013373658948694356, "epoch": 8.876967930029155, "grad_norm": 0.1614876538515091, "learning_rate": 1e-06, "loss": -0.0255, "step": 939 }, { "clip_ratio/high_max": 0.0019729885170818307, "clip_ratio/high_mean": 0.0007681966617383296, "clip_ratio/low_mean": 0.0006934219964023214, "clip_ratio/low_min": 1.0437542186991777e-05, "clip_ratio/region_mean": 0.0014616186817875132, "epoch": 8.886297376093294, "grad_norm": 0.14563900232315063, "learning_rate": 1e-06, "loss": -0.0168, "step": 940 }, { "clip_ratio/high_max": 0.0015420476483996026, "clip_ratio/high_mean": 0.0006500305971712805, "clip_ratio/low_mean": 0.0007192948387455544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013693254368263297, "epoch": 8.895626822157434, "grad_norm": 0.20694388449192047, "learning_rate": 1e-06, "loss": 0.006, "step": 941 }, { "clip_ratio/high_max": 0.002284628222696483, "clip_ratio/high_mean": 0.0008706191438250244, "clip_ratio/low_mean": 0.0006928963130121701, "clip_ratio/low_min": 2.76121045317268e-05, "clip_ratio/region_mean": 0.001563515434099827, "epoch": 9.00932944606414, "grad_norm": 0.16881835460662842, "learning_rate": 1e-06, "loss": -0.0036, "step": 942 }, { "clip_ratio/high_max": 0.002443144090648275, "clip_ratio/high_mean": 0.0009424085401406046, "clip_ratio/low_mean": 0.0005623180331895128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015047265769680962, "epoch": 9.018658892128279, "grad_norm": 0.1989780068397522, "learning_rate": 1e-06, "loss": -0.046, "step": 943 }, { "clip_ratio/high_max": 0.0020269513697712682, "clip_ratio/high_mean": 0.0008147772132360842, "clip_ratio/low_mean": 0.00072794626794348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015427234793605749, "epoch": 9.02798833819242, "grad_norm": 0.18899641931056976, "learning_rate": 1e-06, "loss": -0.0537, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0716378348214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 815.3822021484375, "completions/mean_terminated_length": 562.2305908203125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 9.03731778425656, "frac_reward_zero_std": 0.6869419813156128, "grad_norm": 0.17971986532211304, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 550543197.0, "reward": 0.679757297039032, "reward_std": 0.1369597315788269, "rewards/simpleverify_reward/mean": 0.6797572374343872, "rewards/simpleverify_reward/std": 0.46658602356910706, "step": 945 }, { "clip_ratio/high_max": 0.0016547377708775457, "clip_ratio/high_mean": 0.0006388458832589095, "clip_ratio/low_mean": 0.00042840563219215255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001067251509084599, "epoch": 9.0466472303207, "grad_norm": 0.15994508564472198, "learning_rate": 1e-06, "loss": 0.0001, "step": 946 }, { "clip_ratio/high_max": 0.002197186022385722, "clip_ratio/high_mean": 0.0006978149212955032, "clip_ratio/low_mean": 0.0004688109470407653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011666258578770794, "epoch": 9.055976676384839, "grad_norm": 0.16993102431297302, "learning_rate": 1e-06, "loss": -0.0508, "step": 947 }, { "clip_ratio/high_max": 0.0014584888049284928, "clip_ratio/high_mean": 0.0005705248222511727, "clip_ratio/low_mean": 0.00047105711473705014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010415819197078235, "epoch": 9.06530612244898, "grad_norm": 0.18753620982170105, "learning_rate": 1e-06, "loss": 0.0019, "step": 948 }, { "clip_ratio/high_max": 0.0017163301235996187, "clip_ratio/high_mean": 0.0006941046358406311, "clip_ratio/low_mean": 0.00043076861311419634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011248732662352268, "epoch": 9.07463556851312, "grad_norm": 0.16024427115917206, "learning_rate": 1e-06, "loss": -0.0615, "step": 949 }, { "clip_ratio/high_max": 0.0016436954319942743, "clip_ratio/high_mean": 0.0006430084422390792, "clip_ratio/low_mean": 0.0004635851282728254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001106593572330894, "epoch": 9.08396501457726, "grad_norm": 0.32707831263542175, "learning_rate": 1e-06, "loss": -0.0215, "step": 950 }, { "clip_ratio/high_max": 0.0021122505895618815, "clip_ratio/high_mean": 0.0008647061749798013, "clip_ratio/low_mean": 0.0003538965206644207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012186027015559375, "epoch": 9.093294460641399, "grad_norm": 0.2025754749774933, "learning_rate": 1e-06, "loss": -0.0663, "step": 951 }, { "clip_ratio/high_max": 0.0017749570652085822, "clip_ratio/high_mean": 0.0007212886430352228, "clip_ratio/low_mean": 0.0003576543231247342, "clip_ratio/low_min": 1.755125049385242e-05, "clip_ratio/region_mean": 0.0010789429416036, "epoch": 9.102623906705539, "grad_norm": 0.19464510679244995, "learning_rate": 1e-06, "loss": -0.048, "step": 952 }, { "clip_ratio/high_max": 0.0019189094819012098, "clip_ratio/high_mean": 0.0007605415521538816, "clip_ratio/low_mean": 0.0006120636226114584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013726051547564566, "epoch": 9.11195335276968, "grad_norm": 0.17800183594226837, "learning_rate": 1e-06, "loss": 0.0094, "step": 953 }, { "clip_ratio/high_max": 0.0024833065399434417, "clip_ratio/high_mean": 0.0008433235270786099, "clip_ratio/low_mean": 0.0005803182602903689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014236417737265583, "epoch": 9.12128279883382, "grad_norm": 0.1607794612646103, "learning_rate": 1e-06, "loss": -0.0414, "step": 954 }, { "clip_ratio/high_max": 0.0018194566109741572, "clip_ratio/high_mean": 0.0006992237304075388, "clip_ratio/low_mean": 0.000578798779315548, "clip_ratio/low_min": 1.2040069123031572e-05, "clip_ratio/region_mean": 0.0012780225042661186, "epoch": 9.130612244897959, "grad_norm": 0.1712830364704132, "learning_rate": 1e-06, "loss": -0.0331, "step": 955 }, { "clip_ratio/high_max": 0.002423829289909918, "clip_ratio/high_mean": 0.0009518478291283827, "clip_ratio/low_mean": 0.0006456386845457018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015974865418684203, "epoch": 9.139941690962099, "grad_norm": 0.17182780802249908, "learning_rate": 1e-06, "loss": -0.074, "step": 956 }, { "clip_ratio/high_max": 0.0020721180153486785, "clip_ratio/high_mean": 0.0007712907845416339, "clip_ratio/low_mean": 0.0006145830129753449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013858737984264735, "epoch": 9.14927113702624, "grad_norm": 0.12604933977127075, "learning_rate": 1e-06, "loss": -0.0339, "step": 957 }, { "clip_ratio/high_max": 0.001997025749005843, "clip_ratio/high_mean": 0.0007638560691702878, "clip_ratio/low_mean": 0.0006812817073296173, "clip_ratio/low_min": 3.8739668525522575e-05, "clip_ratio/region_mean": 0.0014451377865043469, "epoch": 9.15860058309038, "grad_norm": 0.3209870159626007, "learning_rate": 1e-06, "loss": 0.0268, "step": 958 }, { "clip_ratio/high_max": 0.0019150478510709945, "clip_ratio/high_mean": 0.000751612737076357, "clip_ratio/low_mean": 0.00070366078580264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014552735156030394, "epoch": 9.167930029154519, "grad_norm": 0.14949150383472443, "learning_rate": 1e-06, "loss": -0.014, "step": 959 }, { "clip_ratio/high_max": 0.002251960217108717, "clip_ratio/high_mean": 0.0008433314915237133, "clip_ratio/low_mean": 0.0006796271645725938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001522958653367823, "epoch": 9.177259475218658, "grad_norm": 0.3394315242767334, "learning_rate": 1e-06, "loss": -0.0295, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0666155133928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 777.9196166992188, "completions/mean_terminated_length": 541.108642578125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 9.186588921282798, "frac_reward_zero_std": 0.7053571939468384, "grad_norm": 0.17634595930576324, "learning_rate": 1e-06, "loss": -0.053, "num_tokens": 559178036.0, "reward": 0.6661551594734192, "reward_std": 0.12901180982589722, "rewards/simpleverify_reward/mean": 0.6661551594734192, "rewards/simpleverify_reward/std": 0.4716014862060547, "step": 961 }, { "clip_ratio/high_max": 0.0019424356505624019, "clip_ratio/high_mean": 0.0007141067617340013, "clip_ratio/low_mean": 0.00035391427900322014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010680210289137904, "epoch": 9.19591836734694, "grad_norm": 0.1551034152507782, "learning_rate": 1e-06, "loss": -0.0251, "step": 962 }, { "clip_ratio/high_max": 0.0017862461390905082, "clip_ratio/high_mean": 0.0006733711325068725, "clip_ratio/low_mean": 0.000396619845105306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010699909798859153, "epoch": 9.205247813411079, "grad_norm": 0.1617315709590912, "learning_rate": 1e-06, "loss": -0.0281, "step": 963 }, { "clip_ratio/high_max": 0.0015928984466881957, "clip_ratio/high_mean": 0.0005695428262697533, "clip_ratio/low_mean": 0.00045019017125014216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010197330011578742, "epoch": 9.214577259475218, "grad_norm": 0.1791970282793045, "learning_rate": 1e-06, "loss": 0.0004, "step": 964 }, { "clip_ratio/high_max": 0.001995206344872713, "clip_ratio/high_mean": 0.0007175151913543232, "clip_ratio/low_mean": 0.0004169379440099874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011344531230861321, "epoch": 9.223906705539358, "grad_norm": 0.15608426928520203, "learning_rate": 1e-06, "loss": -0.0288, "step": 965 }, { "clip_ratio/high_max": 0.0020151612334302627, "clip_ratio/high_mean": 0.0007071384061418939, "clip_ratio/low_mean": 0.00044856497515866067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00115570336492965, "epoch": 9.2332361516035, "grad_norm": 0.15818676352500916, "learning_rate": 1e-06, "loss": -0.0428, "step": 966 }, { "clip_ratio/high_max": 0.0017879697188618593, "clip_ratio/high_mean": 0.0006328865038085496, "clip_ratio/low_mean": 0.0003989888566593436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010318753593310248, "epoch": 9.242565597667639, "grad_norm": 0.2347203493118286, "learning_rate": 1e-06, "loss": -0.0406, "step": 967 }, { "clip_ratio/high_max": 0.00150251177183236, "clip_ratio/high_mean": 0.0006398935638571857, "clip_ratio/low_mean": 0.0005414982733782381, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011813918135885615, "epoch": 9.251895043731778, "grad_norm": 0.1609174758195877, "learning_rate": 1e-06, "loss": -0.0203, "step": 968 }, { "clip_ratio/high_max": 0.0019447939484962262, "clip_ratio/high_mean": 0.0006671911050943891, "clip_ratio/low_mean": 0.00045700702685280703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011241981301282067, "epoch": 9.261224489795918, "grad_norm": 0.18567855656147003, "learning_rate": 1e-06, "loss": -0.0022, "step": 969 }, { "clip_ratio/high_max": 0.0020999932057748083, "clip_ratio/high_mean": 0.0007863326627557399, "clip_ratio/low_mean": 0.0005307507153702318, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013170833626645617, "epoch": 9.270553935860057, "grad_norm": 0.17362654209136963, "learning_rate": 1e-06, "loss": -0.0359, "step": 970 }, { "clip_ratio/high_max": 0.0020038936236232985, "clip_ratio/high_mean": 0.0007963560365169542, "clip_ratio/low_mean": 0.0005905059806536883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013868620444554836, "epoch": 9.279883381924199, "grad_norm": 0.1734657883644104, "learning_rate": 1e-06, "loss": -0.0307, "step": 971 }, { "clip_ratio/high_max": 0.0023402976694342215, "clip_ratio/high_mean": 0.0009069713905773824, "clip_ratio/low_mean": 0.0005829662295582239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014899376692483202, "epoch": 9.289212827988338, "grad_norm": 0.17477284371852875, "learning_rate": 1e-06, "loss": -0.0429, "step": 972 }, { "clip_ratio/high_max": 0.0022828787805337925, "clip_ratio/high_mean": 0.0008298699594888603, "clip_ratio/low_mean": 0.0006838046861048497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015136746660573408, "epoch": 9.298542274052478, "grad_norm": 0.35597121715545654, "learning_rate": 1e-06, "loss": -0.0107, "step": 973 }, { "clip_ratio/high_max": 0.0021340677776606753, "clip_ratio/high_mean": 0.0007832266346667893, "clip_ratio/low_mean": 0.0007655432400497375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015487698547076434, "epoch": 9.307871720116617, "grad_norm": 0.1649247109889984, "learning_rate": 1e-06, "loss": -0.048, "step": 974 }, { "clip_ratio/high_max": 0.0024610555337858386, "clip_ratio/high_mean": 0.0008840714253892656, "clip_ratio/low_mean": 0.0008129857287713094, "clip_ratio/low_min": 6.47332999506034e-05, "clip_ratio/region_mean": 0.0016970571487036068, "epoch": 9.317201166180759, "grad_norm": 0.1683146208524704, "learning_rate": 1e-06, "loss": 0.0022, "step": 975 }, { "clip_ratio/high_max": 0.00224022598558804, "clip_ratio/high_mean": 0.0008061329663178185, "clip_ratio/low_mean": 0.0007781845652061747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015843175278860144, "epoch": 9.326530612244898, "grad_norm": 0.1643582284450531, "learning_rate": 1e-06, "loss": 0.0159, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0673130580357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 785.1867065429688, "completions/mean_terminated_length": 546.2415771484375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 9.335860058309038, "frac_reward_zero_std": 0.6886160969734192, "grad_norm": 0.17985017597675323, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 567906432.0, "reward": 0.6727120876312256, "reward_std": 0.13589756190776825, "rewards/simpleverify_reward/mean": 0.6727120280265808, "rewards/simpleverify_reward/std": 0.4692396819591522, "step": 977 }, { "clip_ratio/high_max": 0.0017952384405361954, "clip_ratio/high_mean": 0.0007401874463539571, "clip_ratio/low_mean": 0.0003304854280941072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010706728735385695, "epoch": 9.345189504373177, "grad_norm": 0.18818628787994385, "learning_rate": 1e-06, "loss": -0.0643, "step": 978 }, { "clip_ratio/high_max": 0.0017497503213235177, "clip_ratio/high_mean": 0.0006314648944680812, "clip_ratio/low_mean": 0.00038445802238129545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010159229132113978, "epoch": 9.354518950437317, "grad_norm": 0.13973994553089142, "learning_rate": 1e-06, "loss": -0.052, "step": 979 }, { "clip_ratio/high_max": 0.0019055005723203067, "clip_ratio/high_mean": 0.0007365245328401215, "clip_ratio/low_mean": 0.0004375678681753925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011740924019250087, "epoch": 9.363848396501458, "grad_norm": 0.16950105130672455, "learning_rate": 1e-06, "loss": -0.0429, "step": 980 }, { "clip_ratio/high_max": 0.0017405969701940194, "clip_ratio/high_mean": 0.0006398640380211873, "clip_ratio/low_mean": 0.0005591398576143547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011990038965450367, "epoch": 9.373177842565598, "grad_norm": 0.17156977951526642, "learning_rate": 1e-06, "loss": -0.0178, "step": 981 }, { "clip_ratio/high_max": 0.001719835421681637, "clip_ratio/high_mean": 0.0006464966136263683, "clip_ratio/low_mean": 0.0006304832804744365, "clip_ratio/low_min": 1.9177661670255475e-05, "clip_ratio/region_mean": 0.001276979917747667, "epoch": 9.382507288629737, "grad_norm": 0.4001530408859253, "learning_rate": 1e-06, "loss": 0.0153, "step": 982 }, { "clip_ratio/high_max": 0.0017122846220445354, "clip_ratio/high_mean": 0.0006557821370734018, "clip_ratio/low_mean": 0.000495652050631179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011514341967995279, "epoch": 9.391836734693877, "grad_norm": 0.14370955526828766, "learning_rate": 1e-06, "loss": -0.0214, "step": 983 }, { "clip_ratio/high_max": 0.0018947352036775555, "clip_ratio/high_mean": 0.000654839681374142, "clip_ratio/low_mean": 0.0005794581174995983, "clip_ratio/low_min": 2.734033296292182e-05, "clip_ratio/region_mean": 0.0012342978006927297, "epoch": 9.401166180758018, "grad_norm": 0.15260306000709534, "learning_rate": 1e-06, "loss": -0.0223, "step": 984 }, { "clip_ratio/high_max": 0.001796559645299567, "clip_ratio/high_mean": 0.0006631135365751106, "clip_ratio/low_mean": 0.0005885332470825233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00125164680321177, "epoch": 9.410495626822158, "grad_norm": 0.20437265932559967, "learning_rate": 1e-06, "loss": 0.0075, "step": 985 }, { "clip_ratio/high_max": 0.001923834344779607, "clip_ratio/high_mean": 0.0006825443342677318, "clip_ratio/low_mean": 0.0004684685418396839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001151012857008027, "epoch": 9.419825072886297, "grad_norm": 0.15974338352680206, "learning_rate": 1e-06, "loss": -0.0188, "step": 986 }, { "clip_ratio/high_max": 0.0017378541597281583, "clip_ratio/high_mean": 0.0007303457477974007, "clip_ratio/low_mean": 0.00045269524434843333, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011830410003312863, "epoch": 9.429154518950437, "grad_norm": 0.1492188572883606, "learning_rate": 1e-06, "loss": -0.0747, "step": 987 }, { "clip_ratio/high_max": 0.002127880994521547, "clip_ratio/high_mean": 0.0007518930033256765, "clip_ratio/low_mean": 0.000581014199042329, "clip_ratio/low_min": 1.1394712601031642e-05, "clip_ratio/region_mean": 0.001332907206233358, "epoch": 9.438483965014576, "grad_norm": 0.2083882838487625, "learning_rate": 1e-06, "loss": -0.0107, "step": 988 }, { "clip_ratio/high_max": 0.001958842729436583, "clip_ratio/high_mean": 0.0007436704318024567, "clip_ratio/low_mean": 0.0006071216166674276, "clip_ratio/low_min": 1.44877139973687e-05, "clip_ratio/region_mean": 0.0013507920502888737, "epoch": 9.447813411078718, "grad_norm": 0.15471157431602478, "learning_rate": 1e-06, "loss": -0.0211, "step": 989 }, { "clip_ratio/high_max": 0.0018755052296910435, "clip_ratio/high_mean": 0.0007591109524582862, "clip_ratio/low_mean": 0.000567406708796625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013265176567074377, "epoch": 9.457142857142857, "grad_norm": 0.16779926419258118, "learning_rate": 1e-06, "loss": -0.0232, "step": 990 }, { "clip_ratio/high_max": 0.002193683285440784, "clip_ratio/high_mean": 0.0009521914489596384, "clip_ratio/low_mean": 0.0007022256249911152, "clip_ratio/low_min": 7.115795960999094e-05, "clip_ratio/region_mean": 0.0016544170648558065, "epoch": 9.466472303206997, "grad_norm": 0.17329081892967224, "learning_rate": 1e-06, "loss": -0.0809, "step": 991 }, { "clip_ratio/high_max": 0.002083881219732575, "clip_ratio/high_mean": 0.0008509308045177022, "clip_ratio/low_mean": 0.0006051282871339936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014560591080226004, "epoch": 9.475801749271136, "grad_norm": 0.14656265079975128, "learning_rate": 1e-06, "loss": -0.073, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0780552455357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3941.0, "completions/mean_length": 832.4307250976562, "completions/mean_terminated_length": 556.1248779296875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 9.485131195335278, "frac_reward_zero_std": 0.6752232313156128, "grad_norm": 0.1949101835489273, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 576708574.0, "reward": 0.6619699001312256, "reward_std": 0.140668123960495, "rewards/simpleverify_reward/mean": 0.6619698405265808, "rewards/simpleverify_reward/std": 0.47305533289909363, "step": 993 }, { "clip_ratio/high_max": 0.0014786214433115674, "clip_ratio/high_mean": 0.0006062898582968046, "clip_ratio/low_mean": 0.00037242952657834394, "clip_ratio/low_min": 1.4852661479380913e-05, "clip_ratio/region_mean": 0.0009787193848751485, "epoch": 9.494460641399417, "grad_norm": 0.3336639404296875, "learning_rate": 1e-06, "loss": -0.048, "step": 994 }, { "clip_ratio/high_max": 0.0019038986065424979, "clip_ratio/high_mean": 0.000693733518346562, "clip_ratio/low_mean": 0.00041415673535993847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011078902352892328, "epoch": 9.503790087463557, "grad_norm": 0.18062427639961243, "learning_rate": 1e-06, "loss": -0.0297, "step": 995 }, { "clip_ratio/high_max": 0.0021283951718942262, "clip_ratio/high_mean": 0.0008517579972249223, "clip_ratio/low_mean": 0.000504132322021178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001355890322884079, "epoch": 9.513119533527696, "grad_norm": 0.19807574152946472, "learning_rate": 1e-06, "loss": -0.0637, "step": 996 }, { "clip_ratio/high_max": 0.001965635223314166, "clip_ratio/high_mean": 0.0007401467100862646, "clip_ratio/low_mean": 0.0004809389783986262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012210857057652902, "epoch": 9.522448979591836, "grad_norm": 0.16812844574451447, "learning_rate": 1e-06, "loss": -0.0655, "step": 997 }, { "clip_ratio/high_max": 0.002038771242951043, "clip_ratio/high_mean": 0.0006594263431907166, "clip_ratio/low_mean": 0.0004533376800281985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011127639991173055, "epoch": 9.531778425655977, "grad_norm": 0.15830011665821075, "learning_rate": 1e-06, "loss": -0.0391, "step": 998 }, { "clip_ratio/high_max": 0.0015672207409807015, "clip_ratio/high_mean": 0.0006129558673819702, "clip_ratio/low_mean": 0.0005369484215407283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011499042811919935, "epoch": 9.541107871720117, "grad_norm": 0.14955942332744598, "learning_rate": 1e-06, "loss": -0.0325, "step": 999 }, { "clip_ratio/high_max": 0.0018050437538477127, "clip_ratio/high_mean": 0.0008305767223646399, "clip_ratio/low_mean": 0.0005203283035370987, "clip_ratio/low_min": 2.8921795092173852e-05, "clip_ratio/region_mean": 0.0013509050259017386, "epoch": 9.550437317784256, "grad_norm": 0.40459978580474854, "learning_rate": 1e-06, "loss": -0.0388, "step": 1000 }, { "clip_ratio/high_max": 0.00209836992144119, "clip_ratio/high_mean": 0.0007945113475216203, "clip_ratio/low_mean": 0.0006391849237843417, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014336962631205097, "epoch": 9.559766763848396, "grad_norm": 0.23577359318733215, "learning_rate": 1e-06, "loss": 0.0169, "step": 1001 }, { "clip_ratio/high_max": 0.0017463684380345512, "clip_ratio/high_mean": 0.0007071204545354703, "clip_ratio/low_mean": 0.0005485423835125403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012556628498714417, "epoch": 9.569096209912537, "grad_norm": 0.1798313856124878, "learning_rate": 1e-06, "loss": -0.0092, "step": 1002 }, { "clip_ratio/high_max": 0.002046927096671425, "clip_ratio/high_mean": 0.0008070457770372741, "clip_ratio/low_mean": 0.000616450412053382, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014234962036425713, "epoch": 9.578425655976677, "grad_norm": 0.17761729657649994, "learning_rate": 1e-06, "loss": -0.0755, "step": 1003 }, { "clip_ratio/high_max": 0.0020198661150061525, "clip_ratio/high_mean": 0.0007408254314213991, "clip_ratio/low_mean": 0.0006405091717169853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013813346231472678, "epoch": 9.587755102040816, "grad_norm": 0.17207640409469604, "learning_rate": 1e-06, "loss": -0.017, "step": 1004 }, { "clip_ratio/high_max": 0.0020958116147085093, "clip_ratio/high_mean": 0.0007528918595198775, "clip_ratio/low_mean": 0.0006139049910416361, "clip_ratio/low_min": 1.1238986189709976e-05, "clip_ratio/region_mean": 0.001366796812362736, "epoch": 9.597084548104956, "grad_norm": 0.1915488839149475, "learning_rate": 1e-06, "loss": -0.0234, "step": 1005 }, { "clip_ratio/high_max": 0.002128433443431277, "clip_ratio/high_mean": 0.0007843407147447579, "clip_ratio/low_mean": 0.0006277472439251142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014120879641268402, "epoch": 9.606413994169095, "grad_norm": 0.18603254854679108, "learning_rate": 1e-06, "loss": -0.0136, "step": 1006 }, { "clip_ratio/high_max": 0.002180125353334006, "clip_ratio/high_mean": 0.0008455338302155724, "clip_ratio/low_mean": 0.0006228045963325712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014683384615636896, "epoch": 9.615743440233237, "grad_norm": 0.15541522204875946, "learning_rate": 1e-06, "loss": -0.0541, "step": 1007 }, { "clip_ratio/high_max": 0.0019881898842868395, "clip_ratio/high_mean": 0.0007551898142992286, "clip_ratio/low_mean": 0.0008706994967724313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016258892828773241, "epoch": 9.625072886297376, "grad_norm": 0.16928693652153015, "learning_rate": 1e-06, "loss": 0.0226, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0772879464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 821.5526733398438, "completions/mean_terminated_length": 547.2793579101562, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 9.634402332361516, "frac_reward_zero_std": 0.6774553656578064, "grad_norm": 0.19711339473724365, "learning_rate": 1e-06, "loss": -0.0233, "num_tokens": 585374929.0, "reward": 0.6644810438156128, "reward_std": 0.141179621219635, "rewards/simpleverify_reward/mean": 0.6644810438156128, "rewards/simpleverify_reward/std": 0.47218799591064453, "step": 1009 }, { "clip_ratio/high_max": 0.0018099943044944666, "clip_ratio/high_mean": 0.0005804296870337566, "clip_ratio/low_mean": 0.0003625873437158589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009430170375708258, "epoch": 9.643731778425655, "grad_norm": 0.37854063510894775, "learning_rate": 1e-06, "loss": -0.0253, "step": 1010 }, { "clip_ratio/high_max": 0.0017566644819453359, "clip_ratio/high_mean": 0.0007027833626125357, "clip_ratio/low_mean": 0.00037291476542122837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001075698149179516, "epoch": 9.653061224489797, "grad_norm": 0.1791907548904419, "learning_rate": 1e-06, "loss": -0.0458, "step": 1011 }, { "clip_ratio/high_max": 0.0017551482869748725, "clip_ratio/high_mean": 0.0006800559667681227, "clip_ratio/low_mean": 0.0004403259690661798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011203819522052072, "epoch": 9.662390670553936, "grad_norm": 0.18300682306289673, "learning_rate": 1e-06, "loss": -0.0343, "step": 1012 }, { "clip_ratio/high_max": 0.001907309408125002, "clip_ratio/high_mean": 0.0006715751460433239, "clip_ratio/low_mean": 0.0005111733398734941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011827485031972174, "epoch": 9.671720116618076, "grad_norm": 0.1788371205329895, "learning_rate": 1e-06, "loss": -0.0359, "step": 1013 }, { "clip_ratio/high_max": 0.0021542824542848393, "clip_ratio/high_mean": 0.0007635960300831357, "clip_ratio/low_mean": 0.0004942482448768715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012578442838275805, "epoch": 9.681049562682215, "grad_norm": 0.16179868578910828, "learning_rate": 1e-06, "loss": -0.0397, "step": 1014 }, { "clip_ratio/high_max": 0.001814277911762474, "clip_ratio/high_mean": 0.0006887316394568188, "clip_ratio/low_mean": 0.00044606711526284926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001134798736529774, "epoch": 9.690379008746355, "grad_norm": 0.17980802059173584, "learning_rate": 1e-06, "loss": -0.0252, "step": 1015 }, { "clip_ratio/high_max": 0.001947916727658594, "clip_ratio/high_mean": 0.0007896624774730299, "clip_ratio/low_mean": 0.0005166765458852751, "clip_ratio/low_min": 1.7964932339964435e-05, "clip_ratio/region_mean": 0.0013063389815215487, "epoch": 9.699708454810496, "grad_norm": 0.2003168761730194, "learning_rate": 1e-06, "loss": -0.036, "step": 1016 }, { "clip_ratio/high_max": 0.001921015078551136, "clip_ratio/high_mean": 0.0007560367630503606, "clip_ratio/low_mean": 0.0006100463051552651, "clip_ratio/low_min": 1.6846361177158542e-05, "clip_ratio/region_mean": 0.0013660831027664244, "epoch": 9.709037900874636, "grad_norm": 0.20250247418880463, "learning_rate": 1e-06, "loss": -0.0303, "step": 1017 }, { "clip_ratio/high_max": 0.002160037838621065, "clip_ratio/high_mean": 0.0008639184998173732, "clip_ratio/low_mean": 0.000594328347233386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014582468611479271, "epoch": 9.718367346938775, "grad_norm": 0.16375523805618286, "learning_rate": 1e-06, "loss": -0.0212, "step": 1018 }, { "clip_ratio/high_max": 0.001982917216082569, "clip_ratio/high_mean": 0.0007672537394682877, "clip_ratio/low_mean": 0.0005821316372021101, "clip_ratio/low_min": 1.396336028847145e-05, "clip_ratio/region_mean": 0.0013493853803083766, "epoch": 9.727696793002915, "grad_norm": 0.16571122407913208, "learning_rate": 1e-06, "loss": -0.0212, "step": 1019 }, { "clip_ratio/high_max": 0.002405844108579913, "clip_ratio/high_mean": 0.0008451171543129021, "clip_ratio/low_mean": 0.0006225280994840432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014676452592539135, "epoch": 9.737026239067056, "grad_norm": 0.16750848293304443, "learning_rate": 1e-06, "loss": -0.03, "step": 1020 }, { "clip_ratio/high_max": 0.0019815667874354403, "clip_ratio/high_mean": 0.0008777757084317273, "clip_ratio/low_mean": 0.0005531903334485833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014309660473372787, "epoch": 9.746355685131196, "grad_norm": 0.16585005819797516, "learning_rate": 1e-06, "loss": -0.0716, "step": 1021 }, { "clip_ratio/high_max": 0.0020915654677082784, "clip_ratio/high_mean": 0.0008779312029219, "clip_ratio/low_mean": 0.0005948483549218508, "clip_ratio/low_min": 1.1516491213114932e-05, "clip_ratio/region_mean": 0.001472779560572235, "epoch": 9.755685131195335, "grad_norm": 0.148276686668396, "learning_rate": 1e-06, "loss": -0.0507, "step": 1022 }, { "clip_ratio/high_max": 0.0020321079973655287, "clip_ratio/high_mean": 0.0007193689107225509, "clip_ratio/low_mean": 0.0007310923083423404, "clip_ratio/low_min": 2.2465852453024127e-05, "clip_ratio/region_mean": 0.0014504612372547854, "epoch": 9.765014577259475, "grad_norm": 0.1604248285293579, "learning_rate": 1e-06, "loss": -0.0221, "step": 1023 }, { "clip_ratio/high_max": 0.002096129952406045, "clip_ratio/high_mean": 0.0008852497794578085, "clip_ratio/low_mean": 0.0007964958040247438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016817455980344675, "epoch": 9.774344023323614, "grad_norm": 0.21424144506454468, "learning_rate": 1e-06, "loss": -0.0333, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08056640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 832.1668090820312, "completions/mean_terminated_length": 546.1697387695312, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 9.783673469387756, "frac_reward_zero_std": 0.6858259439468384, "grad_norm": 0.18690288066864014, "learning_rate": 1e-06, "loss": -0.0666, "num_tokens": 593971392.0, "reward": 0.6559709906578064, "reward_std": 0.13854390382766724, "rewards/simpleverify_reward/mean": 0.6559709906578064, "rewards/simpleverify_reward/std": 0.475067138671875, "step": 1025 }, { "clip_ratio/high_max": 0.001871199463494122, "clip_ratio/high_mean": 0.0006868391883472214, "clip_ratio/low_mean": 0.0003740343731806206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010608735519781476, "epoch": 9.793002915451895, "grad_norm": 0.21212653815746307, "learning_rate": 1e-06, "loss": -0.0585, "step": 1026 }, { "clip_ratio/high_max": 0.0017325400622212328, "clip_ratio/high_mean": 0.000619778085820144, "clip_ratio/low_mean": 0.0005686905124093755, "clip_ratio/low_min": 1.2755102034134325e-05, "clip_ratio/region_mean": 0.0011884686136909295, "epoch": 9.802332361516035, "grad_norm": 0.21864843368530273, "learning_rate": 1e-06, "loss": 0.0163, "step": 1027 }, { "clip_ratio/high_max": 0.0019118049858661834, "clip_ratio/high_mean": 0.0007328125557251042, "clip_ratio/low_mean": 0.00045826367500012566, "clip_ratio/low_min": 1.0997712706739549e-05, "clip_ratio/region_mean": 0.0011910762259503826, "epoch": 9.811661807580174, "grad_norm": 0.19601017236709595, "learning_rate": 1e-06, "loss": -0.0207, "step": 1028 }, { "clip_ratio/high_max": 0.0019309499803057406, "clip_ratio/high_mean": 0.0007837676221242873, "clip_ratio/low_mean": 0.00047378897170347045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012575565924635157, "epoch": 9.820991253644316, "grad_norm": 0.17612512409687042, "learning_rate": 1e-06, "loss": -0.0211, "step": 1029 }, { "clip_ratio/high_max": 0.0020811241811315995, "clip_ratio/high_mean": 0.0007893754882388748, "clip_ratio/low_mean": 0.0004162891345913522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001205664622830227, "epoch": 9.830320699708455, "grad_norm": 0.16400597989559174, "learning_rate": 1e-06, "loss": -0.0767, "step": 1030 }, { "clip_ratio/high_max": 0.0019207383556931745, "clip_ratio/high_mean": 0.0006824558367952704, "clip_ratio/low_mean": 0.00043734807331929915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011198038955626544, "epoch": 9.839650145772595, "grad_norm": 0.23622548580169678, "learning_rate": 1e-06, "loss": -0.0359, "step": 1031 }, { "clip_ratio/high_max": 0.0017744337856129277, "clip_ratio/high_mean": 0.0007038676212687278, "clip_ratio/low_mean": 0.00043727504316848353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011411426748964004, "epoch": 9.848979591836734, "grad_norm": 0.16788391768932343, "learning_rate": 1e-06, "loss": -0.0501, "step": 1032 }, { "clip_ratio/high_max": 0.0018753482545434963, "clip_ratio/high_mean": 0.0006174472546263132, "clip_ratio/low_mean": 0.0006773617924409336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012948090443387628, "epoch": 9.858309037900874, "grad_norm": 0.15707825124263763, "learning_rate": 1e-06, "loss": 0.0088, "step": 1033 }, { "clip_ratio/high_max": 0.0017209925499628298, "clip_ratio/high_mean": 0.0007214898396341596, "clip_ratio/low_mean": 0.0005045406660428853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012260304938536137, "epoch": 9.867638483965015, "grad_norm": 0.1527063250541687, "learning_rate": 1e-06, "loss": -0.0395, "step": 1034 }, { "clip_ratio/high_max": 0.002083565719658509, "clip_ratio/high_mean": 0.000846570381327183, "clip_ratio/low_mean": 0.000554320792616636, "clip_ratio/low_min": 2.2080905182519928e-05, "clip_ratio/region_mean": 0.001400891149387462, "epoch": 9.876967930029155, "grad_norm": 0.1727484166622162, "learning_rate": 1e-06, "loss": -0.0286, "step": 1035 }, { "clip_ratio/high_max": 0.00195695108777727, "clip_ratio/high_mean": 0.0007362613578152377, "clip_ratio/low_mean": 0.0005539048752325471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001290166204853449, "epoch": 9.886297376093294, "grad_norm": 0.17491671442985535, "learning_rate": 1e-06, "loss": -0.0512, "step": 1036 }, { "clip_ratio/high_max": 0.0021074680335004814, "clip_ratio/high_mean": 0.0008661402425786946, "clip_ratio/low_mean": 0.0005848921045981115, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014510323089780286, "epoch": 9.895626822157434, "grad_norm": 0.15207058191299438, "learning_rate": 1e-06, "loss": -0.0525, "step": 1037 }, { "clip_ratio/high_max": 0.0021660482307197526, "clip_ratio/high_mean": 0.0008454494854959194, "clip_ratio/low_mean": 0.0006677256205875892, "clip_ratio/low_min": 1.3180092537368182e-05, "clip_ratio/region_mean": 0.001513175153377233, "epoch": 10.00932944606414, "grad_norm": 0.19193072617053986, "learning_rate": 1e-06, "loss": -0.0304, "step": 1038 }, { "clip_ratio/high_max": 0.002159712377761025, "clip_ratio/high_mean": 0.0008169761204044335, "clip_ratio/low_mean": 0.000648494131382904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001465470249968348, "epoch": 10.018658892128279, "grad_norm": 0.19283096492290497, "learning_rate": 1e-06, "loss": -0.0053, "step": 1039 }, { "clip_ratio/high_max": 0.0020708161682705395, "clip_ratio/high_mean": 0.0009063771303772228, "clip_ratio/low_mean": 0.0007096985000316636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001616075613128487, "epoch": 10.02798833819242, "grad_norm": 0.20065517723560333, "learning_rate": 1e-06, "loss": -0.0307, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0710797991071429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 798.6757202148438, "completions/mean_terminated_length": 546.3685913085938, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 10.03731778425656, "frac_reward_zero_std": 0.680245578289032, "grad_norm": 0.17117252945899963, "learning_rate": 1e-06, "loss": -0.0771, "num_tokens": 602681471.0, "reward": 0.675362765789032, "reward_std": 0.13688106834888458, "rewards/simpleverify_reward/mean": 0.6753627061843872, "rewards/simpleverify_reward/std": 0.46825549006462097, "step": 1041 }, { "clip_ratio/high_max": 0.0021142881523701362, "clip_ratio/high_mean": 0.000665472145556123, "clip_ratio/low_mean": 0.00039938085137691814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010648529569152743, "epoch": 10.0466472303207, "grad_norm": 0.2391090989112854, "learning_rate": 1e-06, "loss": -0.0572, "step": 1042 }, { "clip_ratio/high_max": 0.0017631136142881587, "clip_ratio/high_mean": 0.0006520333754451713, "clip_ratio/low_mean": 0.0005126067026139935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011646400707832072, "epoch": 10.055976676384839, "grad_norm": 0.2460571825504303, "learning_rate": 1e-06, "loss": 0.0195, "step": 1043 }, { "clip_ratio/high_max": 0.00190213070527534, "clip_ratio/high_mean": 0.0007314919785130769, "clip_ratio/low_mean": 0.00036975373996028793, "clip_ratio/low_min": 2.1777002984890714e-05, "clip_ratio/region_mean": 0.0011012457216565963, "epoch": 10.06530612244898, "grad_norm": 0.13374176621437073, "learning_rate": 1e-06, "loss": -0.0606, "step": 1044 }, { "clip_ratio/high_max": 0.00192573934327811, "clip_ratio/high_mean": 0.0007407673710986273, "clip_ratio/low_mean": 0.0004090164395620377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001149783842265606, "epoch": 10.07463556851312, "grad_norm": 0.1908431053161621, "learning_rate": 1e-06, "loss": -0.049, "step": 1045 }, { "clip_ratio/high_max": 0.002194939930632245, "clip_ratio/high_mean": 0.00081325885548722, "clip_ratio/low_mean": 0.0006527060231746873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014659648586530238, "epoch": 10.08396501457726, "grad_norm": 0.18014021217823029, "learning_rate": 1e-06, "loss": -0.0106, "step": 1046 }, { "clip_ratio/high_max": 0.001649887779421988, "clip_ratio/high_mean": 0.000595117497141473, "clip_ratio/low_mean": 0.00056445256814186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011595700998441316, "epoch": 10.093294460641399, "grad_norm": 0.1579229086637497, "learning_rate": 1e-06, "loss": 0.0135, "step": 1047 }, { "clip_ratio/high_max": 0.0018313334148842841, "clip_ratio/high_mean": 0.0007626730021002004, "clip_ratio/low_mean": 0.0005279520391923143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001290625015826663, "epoch": 10.102623906705539, "grad_norm": 0.4924648106098175, "learning_rate": 1e-06, "loss": -0.0283, "step": 1048 }, { "clip_ratio/high_max": 0.0018951434212794993, "clip_ratio/high_mean": 0.0007207135222415673, "clip_ratio/low_mean": 0.0007011228990450036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014218364231055602, "epoch": 10.11195335276968, "grad_norm": 0.19913531839847565, "learning_rate": 1e-06, "loss": -0.0126, "step": 1049 }, { "clip_ratio/high_max": 0.0018622910502017476, "clip_ratio/high_mean": 0.0007351679505518405, "clip_ratio/low_mean": 0.0004896961618214846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012248640960024204, "epoch": 10.12128279883382, "grad_norm": 0.16033484041690826, "learning_rate": 1e-06, "loss": -0.0129, "step": 1050 }, { "clip_ratio/high_max": 0.0018166680456488393, "clip_ratio/high_mean": 0.0008038783253141446, "clip_ratio/low_mean": 0.0005238240373728331, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013277023899718188, "epoch": 10.130612244897959, "grad_norm": 0.19091400504112244, "learning_rate": 1e-06, "loss": -0.0429, "step": 1051 }, { "clip_ratio/high_max": 0.002131759414623957, "clip_ratio/high_mean": 0.0008847239987517241, "clip_ratio/low_mean": 0.0006034456282577594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00148816962246201, "epoch": 10.139941690962099, "grad_norm": 0.20108860731124878, "learning_rate": 1e-06, "loss": -0.0489, "step": 1052 }, { "clip_ratio/high_max": 0.0018883378943428397, "clip_ratio/high_mean": 0.0008280871425085934, "clip_ratio/low_mean": 0.0008120698430502671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016401569700974505, "epoch": 10.14927113702624, "grad_norm": 0.1823475956916809, "learning_rate": 1e-06, "loss": -0.0264, "step": 1053 }, { "clip_ratio/high_max": 0.002084287832985865, "clip_ratio/high_mean": 0.0008297029362438479, "clip_ratio/low_mean": 0.0006451527833633008, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014748557441635057, "epoch": 10.15860058309038, "grad_norm": 0.17142722010612488, "learning_rate": 1e-06, "loss": -0.0457, "step": 1054 }, { "clip_ratio/high_max": 0.0023319746323977597, "clip_ratio/high_mean": 0.0008945962399593554, "clip_ratio/low_mean": 0.0006629313456869568, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015575276011077221, "epoch": 10.167930029154519, "grad_norm": 0.16825301945209503, "learning_rate": 1e-06, "loss": -0.0392, "step": 1055 }, { "clip_ratio/high_max": 0.002288734627654776, "clip_ratio/high_mean": 0.0008507565198669909, "clip_ratio/low_mean": 0.0007202446586234146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015710011393821333, "epoch": 10.177259475218658, "grad_norm": 0.1449366807937622, "learning_rate": 1e-06, "loss": -0.0318, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0855189732142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 867.5109252929688, "completions/mean_terminated_length": 565.5941772460938, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 10.186588921282798, "frac_reward_zero_std": 0.691964328289032, "grad_norm": 0.16988149285316467, "learning_rate": 1e-06, "loss": -0.0494, "num_tokens": 611562531.0, "reward": 0.6497628688812256, "reward_std": 0.13506881892681122, "rewards/simpleverify_reward/mean": 0.6497628092765808, "rewards/simpleverify_reward/std": 0.4770607650279999, "step": 1057 }, { "clip_ratio/high_max": 0.0017359608227707213, "clip_ratio/high_mean": 0.0005711868652724661, "clip_ratio/low_mean": 0.0004482479198486544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010194347833021311, "epoch": 10.19591836734694, "grad_norm": 0.24584777653217316, "learning_rate": 1e-06, "loss": -0.0266, "step": 1058 }, { "clip_ratio/high_max": 0.0016817401847220026, "clip_ratio/high_mean": 0.0006931623865966685, "clip_ratio/low_mean": 0.000426816099206917, "clip_ratio/low_min": 9.221009349857923e-06, "clip_ratio/region_mean": 0.0011199784930795431, "epoch": 10.205247813411079, "grad_norm": 0.16640333831310272, "learning_rate": 1e-06, "loss": -0.0632, "step": 1059 }, { "clip_ratio/high_max": 0.0016564005418331362, "clip_ratio/high_mean": 0.0006491796680165862, "clip_ratio/low_mean": 0.0003990244140368304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010482041107024997, "epoch": 10.214577259475218, "grad_norm": 0.21573370695114136, "learning_rate": 1e-06, "loss": -0.0474, "step": 1060 }, { "clip_ratio/high_max": 0.0017388373235007748, "clip_ratio/high_mean": 0.0007418792811222374, "clip_ratio/low_mean": 0.00039452465125577874, "clip_ratio/low_min": 1.6983694877126254e-05, "clip_ratio/region_mean": 0.001136403934651753, "epoch": 10.223906705539358, "grad_norm": 0.13224484026432037, "learning_rate": 1e-06, "loss": -0.0422, "step": 1061 }, { "clip_ratio/high_max": 0.0018849876068998128, "clip_ratio/high_mean": 0.0006497544891317375, "clip_ratio/low_mean": 0.0005649208687827922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012146753797424026, "epoch": 10.2332361516035, "grad_norm": 0.16401737928390503, "learning_rate": 1e-06, "loss": -0.0414, "step": 1062 }, { "clip_ratio/high_max": 0.0019540362045518123, "clip_ratio/high_mean": 0.000683033955283463, "clip_ratio/low_mean": 0.00048530845651839627, "clip_ratio/low_min": 1.7071837646653876e-05, "clip_ratio/region_mean": 0.00116834239452146, "epoch": 10.242565597667639, "grad_norm": 0.18567335605621338, "learning_rate": 1e-06, "loss": -0.0328, "step": 1063 }, { "clip_ratio/high_max": 0.0016760933795012534, "clip_ratio/high_mean": 0.0006150909466668963, "clip_ratio/low_mean": 0.0004793109371803439, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010944018977170344, "epoch": 10.251895043731778, "grad_norm": 0.16446653008460999, "learning_rate": 1e-06, "loss": -0.0078, "step": 1064 }, { "clip_ratio/high_max": 0.0015547363182122353, "clip_ratio/high_mean": 0.0006742811920048553, "clip_ratio/low_mean": 0.00047631284519411565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011505940492497757, "epoch": 10.261224489795918, "grad_norm": 0.1537928432226181, "learning_rate": 1e-06, "loss": -0.0352, "step": 1065 }, { "clip_ratio/high_max": 0.0017656412601354532, "clip_ratio/high_mean": 0.0007315971179195913, "clip_ratio/low_mean": 0.0006798618114771671, "clip_ratio/low_min": 2.037157719314564e-05, "clip_ratio/region_mean": 0.0014114589175733272, "epoch": 10.270553935860057, "grad_norm": 0.14060604572296143, "learning_rate": 1e-06, "loss": -0.0158, "step": 1066 }, { "clip_ratio/high_max": 0.002139915170118911, "clip_ratio/high_mean": 0.0007512171414418845, "clip_ratio/low_mean": 0.0006912263133926899, "clip_ratio/low_min": 1.517727014288539e-05, "clip_ratio/region_mean": 0.0014424434666580055, "epoch": 10.279883381924199, "grad_norm": 0.17164021730422974, "learning_rate": 1e-06, "loss": -0.0235, "step": 1067 }, { "clip_ratio/high_max": 0.0022960917121963575, "clip_ratio/high_mean": 0.0008065370911936043, "clip_ratio/low_mean": 0.0006889023816256667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014954394682717975, "epoch": 10.289212827988338, "grad_norm": 0.17008215188980103, "learning_rate": 1e-06, "loss": -0.0039, "step": 1068 }, { "clip_ratio/high_max": 0.0021900596693740226, "clip_ratio/high_mean": 0.0009362786768178921, "clip_ratio/low_mean": 0.0005077882374280307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014440669328905642, "epoch": 10.298542274052478, "grad_norm": 0.15173810720443726, "learning_rate": 1e-06, "loss": -0.0564, "step": 1069 }, { "clip_ratio/high_max": 0.0022739927444490604, "clip_ratio/high_mean": 0.0008437618034804473, "clip_ratio/low_mean": 0.0005223061416472774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013660679396707565, "epoch": 10.307871720116617, "grad_norm": 0.3963997960090637, "learning_rate": 1e-06, "loss": -0.0842, "step": 1070 }, { "clip_ratio/high_max": 0.0019368517168913968, "clip_ratio/high_mean": 0.000728627506759949, "clip_ratio/low_mean": 0.0006163375583128072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013449650759866927, "epoch": 10.317201166180759, "grad_norm": 0.1993507593870163, "learning_rate": 1e-06, "loss": -0.02, "step": 1071 }, { "clip_ratio/high_max": 0.0020613192646123935, "clip_ratio/high_mean": 0.0007039549527689815, "clip_ratio/low_mean": 0.0005953898821644543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012993447999178898, "epoch": 10.326530612244898, "grad_norm": 0.2084777057170868, "learning_rate": 1e-06, "loss": -0.0225, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07763671875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 822.0482788085938, "completions/mean_terminated_length": 546.4747924804688, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 10.335860058309038, "frac_reward_zero_std": 0.6891741156578064, "grad_norm": 0.18331322073936462, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 620258535.0, "reward": 0.6720145344734192, "reward_std": 0.1364416927099228, "rewards/simpleverify_reward/mean": 0.6720145344734192, "rewards/simpleverify_reward/std": 0.46949589252471924, "step": 1073 }, { "clip_ratio/high_max": 0.0019294800331408624, "clip_ratio/high_mean": 0.0007304402670342824, "clip_ratio/low_mean": 0.0003617265756474808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010921668363153003, "epoch": 10.345189504373177, "grad_norm": 0.15711525082588196, "learning_rate": 1e-06, "loss": -0.0346, "step": 1074 }, { "clip_ratio/high_max": 0.001478888214478502, "clip_ratio/high_mean": 0.0006277092124946648, "clip_ratio/low_mean": 0.000440868999248778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001068578214471927, "epoch": 10.354518950437317, "grad_norm": 0.15108348429203033, "learning_rate": 1e-06, "loss": -0.0353, "step": 1075 }, { "clip_ratio/high_max": 0.0015833344332349952, "clip_ratio/high_mean": 0.000623417045972019, "clip_ratio/low_mean": 0.0004592848188167409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010827018704731017, "epoch": 10.363848396501458, "grad_norm": 0.18519426882266998, "learning_rate": 1e-06, "loss": -0.0111, "step": 1076 }, { "clip_ratio/high_max": 0.0014691108808619902, "clip_ratio/high_mean": 0.0006035342257746379, "clip_ratio/low_mean": 0.00048680995291761064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001090344179829117, "epoch": 10.373177842565598, "grad_norm": 0.16744376718997955, "learning_rate": 1e-06, "loss": 0.0005, "step": 1077 }, { "clip_ratio/high_max": 0.0019882152882928494, "clip_ratio/high_mean": 0.0008480068090648274, "clip_ratio/low_mean": 0.0003873338800985948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012353407037153374, "epoch": 10.382507288629737, "grad_norm": 0.1644911766052246, "learning_rate": 1e-06, "loss": -0.0524, "step": 1078 }, { "clip_ratio/high_max": 0.0020583483601512853, "clip_ratio/high_mean": 0.0007722849204583326, "clip_ratio/low_mean": 0.0005127830643232301, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012850679595430847, "epoch": 10.391836734693877, "grad_norm": 0.1885521560907364, "learning_rate": 1e-06, "loss": -0.0558, "step": 1079 }, { "clip_ratio/high_max": 0.0019401207027840428, "clip_ratio/high_mean": 0.0007197784198069712, "clip_ratio/low_mean": 0.0005661926452376065, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012859710332122631, "epoch": 10.401166180758018, "grad_norm": 0.21013376116752625, "learning_rate": 1e-06, "loss": 0.0021, "step": 1080 }, { "clip_ratio/high_max": 0.002185987486882368, "clip_ratio/high_mean": 0.0007500017982238205, "clip_ratio/low_mean": 0.0005177888679099851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012677906488534063, "epoch": 10.410495626822158, "grad_norm": 0.17094485461711884, "learning_rate": 1e-06, "loss": -0.0334, "step": 1081 }, { "clip_ratio/high_max": 0.0018471715738996863, "clip_ratio/high_mean": 0.0007303419215531903, "clip_ratio/low_mean": 0.0005427040096037672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012730459457088728, "epoch": 10.419825072886297, "grad_norm": 0.15183934569358826, "learning_rate": 1e-06, "loss": -0.0319, "step": 1082 }, { "clip_ratio/high_max": 0.002083048617350869, "clip_ratio/high_mean": 0.0007889515309216222, "clip_ratio/low_mean": 0.0005207639605941949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001309715495153796, "epoch": 10.429154518950437, "grad_norm": 0.17092710733413696, "learning_rate": 1e-06, "loss": -0.0575, "step": 1083 }, { "clip_ratio/high_max": 0.0022427023177442607, "clip_ratio/high_mean": 0.0008620372991572367, "clip_ratio/low_mean": 0.0006114915995567571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014735289078089409, "epoch": 10.438483965014576, "grad_norm": 0.1498129814863205, "learning_rate": 1e-06, "loss": -0.0652, "step": 1084 }, { "clip_ratio/high_max": 0.0017214549261552747, "clip_ratio/high_mean": 0.0007019369732006453, "clip_ratio/low_mean": 0.0006574823146365816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013594192605523858, "epoch": 10.447813411078718, "grad_norm": 0.2470574676990509, "learning_rate": 1e-06, "loss": -0.0379, "step": 1085 }, { "clip_ratio/high_max": 0.0020304538338677958, "clip_ratio/high_mean": 0.0007767761944705853, "clip_ratio/low_mean": 0.0006970031918172026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014737793899257667, "epoch": 10.457142857142857, "grad_norm": 0.15252156555652618, "learning_rate": 1e-06, "loss": -0.0492, "step": 1086 }, { "clip_ratio/high_max": 0.0023290964600164443, "clip_ratio/high_mean": 0.0008866315674822545, "clip_ratio/low_mean": 0.0006446559109463124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015312874602386728, "epoch": 10.466472303206997, "grad_norm": 60.34867477416992, "learning_rate": 1e-06, "loss": -0.0579, "step": 1087 }, { "clip_ratio/high_max": 0.0022937680296308827, "clip_ratio/high_mean": 0.0008359161893167766, "clip_ratio/low_mean": 0.0006865485665912274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015224647249851841, "epoch": 10.475801749271136, "grad_norm": 0.18487729132175446, "learning_rate": 1e-06, "loss": 0.0028, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0777762276785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 822.5419311523438, "completions/mean_terminated_length": 546.47314453125, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 10.485131195335278, "frac_reward_zero_std": 0.6969866156578064, "grad_norm": 0.33254748582839966, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 628936600.0, "reward": 0.6749442219734192, "reward_std": 0.13336391746997833, "rewards/simpleverify_reward/mean": 0.6749442219734192, "rewards/simpleverify_reward/std": 0.46841204166412354, "step": 1089 }, { "clip_ratio/high_max": 0.0016312984225805849, "clip_ratio/high_mean": 0.000615172129982966, "clip_ratio/low_mean": 0.0003825004832833656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009976726287277415, "epoch": 10.494460641399417, "grad_norm": 0.17349538207054138, "learning_rate": 1e-06, "loss": -0.0369, "step": 1090 }, { "clip_ratio/high_max": 0.0019661090191220865, "clip_ratio/high_mean": 0.0006818989259045338, "clip_ratio/low_mean": 0.0005707632226403803, "clip_ratio/low_min": 1.6578249415033497e-05, "clip_ratio/region_mean": 0.0012526621358119883, "epoch": 10.503790087463557, "grad_norm": 0.3489252030849457, "learning_rate": 1e-06, "loss": -0.0051, "step": 1091 }, { "clip_ratio/high_max": 0.001777749235770898, "clip_ratio/high_mean": 0.0006433501312130829, "clip_ratio/low_mean": 0.00040648000276632956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001049830139891128, "epoch": 10.513119533527696, "grad_norm": 0.18891800940036774, "learning_rate": 1e-06, "loss": -0.0478, "step": 1092 }, { "clip_ratio/high_max": 0.0015609364672855008, "clip_ratio/high_mean": 0.0005790243230876513, "clip_ratio/low_mean": 0.0005907692579967261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011697935660777148, "epoch": 10.522448979591836, "grad_norm": 0.17817319929599762, "learning_rate": 1e-06, "loss": 0.0108, "step": 1093 }, { "clip_ratio/high_max": 0.00203627219889313, "clip_ratio/high_mean": 0.0007919592699181521, "clip_ratio/low_mean": 0.00036367969619277574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011556389690667856, "epoch": 10.531778425655977, "grad_norm": 0.13944748044013977, "learning_rate": 1e-06, "loss": -0.061, "step": 1094 }, { "clip_ratio/high_max": 0.0021321947242540773, "clip_ratio/high_mean": 0.0007186846696640714, "clip_ratio/low_mean": 0.0005186649495954043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012373496374493698, "epoch": 10.541107871720117, "grad_norm": 0.16078974306583405, "learning_rate": 1e-06, "loss": -0.0451, "step": 1095 }, { "clip_ratio/high_max": 0.0020263360893295612, "clip_ratio/high_mean": 0.0006927729455128429, "clip_ratio/low_mean": 0.00043552134366109385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011282943014521152, "epoch": 10.550437317784256, "grad_norm": 0.14787344634532928, "learning_rate": 1e-06, "loss": -0.04, "step": 1096 }, { "clip_ratio/high_max": 0.0022967680415604264, "clip_ratio/high_mean": 0.0008725091338419588, "clip_ratio/low_mean": 0.0006267482949624537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014992574251664337, "epoch": 10.559766763848396, "grad_norm": 0.17783017456531525, "learning_rate": 1e-06, "loss": -0.004, "step": 1097 }, { "clip_ratio/high_max": 0.002465939112880733, "clip_ratio/high_mean": 0.0008625999325886369, "clip_ratio/low_mean": 0.0005681917264155345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014307916317193303, "epoch": 10.569096209912537, "grad_norm": 0.17075182497501373, "learning_rate": 1e-06, "loss": -0.0596, "step": 1098 }, { "clip_ratio/high_max": 0.002098060449498007, "clip_ratio/high_mean": 0.0007631910575582879, "clip_ratio/low_mean": 0.0005919801333220676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013551711817854084, "epoch": 10.578425655976677, "grad_norm": 0.19616743922233582, "learning_rate": 1e-06, "loss": -0.038, "step": 1099 }, { "clip_ratio/high_max": 0.0018755121891445015, "clip_ratio/high_mean": 0.0007639687373739434, "clip_ratio/low_mean": 0.00047291034024965484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012368790776235983, "epoch": 10.587755102040816, "grad_norm": 0.16690245270729065, "learning_rate": 1e-06, "loss": -0.0367, "step": 1100 }, { "clip_ratio/high_max": 0.0023163973019109108, "clip_ratio/high_mean": 0.0008133127594192047, "clip_ratio/low_mean": 0.0006035823621459713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001416895156580722, "epoch": 10.597084548104956, "grad_norm": 0.3957762122154236, "learning_rate": 1e-06, "loss": -0.0344, "step": 1101 }, { "clip_ratio/high_max": 0.002547283496824093, "clip_ratio/high_mean": 0.0008936933027143823, "clip_ratio/low_mean": 0.0007459514272341039, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016396447463193908, "epoch": 10.606413994169095, "grad_norm": 0.15641573071479797, "learning_rate": 1e-06, "loss": -0.0385, "step": 1102 }, { "clip_ratio/high_max": 0.0023444023318006657, "clip_ratio/high_mean": 0.001024219011014793, "clip_ratio/low_mean": 0.0007017761672614142, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001725995163724292, "epoch": 10.615743440233237, "grad_norm": 0.15097039937973022, "learning_rate": 1e-06, "loss": -0.0554, "step": 1103 }, { "clip_ratio/high_max": 0.002393023041804554, "clip_ratio/high_mean": 0.0009241655770892976, "clip_ratio/low_mean": 0.0007037202003630227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016278857438010164, "epoch": 10.625072886297376, "grad_norm": 0.16306771337985992, "learning_rate": 1e-06, "loss": -0.0372, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0768694196428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 818.8446655273438, "completions/mean_terminated_length": 545.9547119140625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 10.634402332361516, "frac_reward_zero_std": 0.6897321939468384, "grad_norm": 0.19711828231811523, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 637559549.0, "reward": 0.681222140789032, "reward_std": 0.13518932461738586, "rewards/simpleverify_reward/mean": 0.6812220811843872, "rewards/simpleverify_reward/std": 0.4660189747810364, "step": 1105 }, { "clip_ratio/high_max": 0.0015610785376338754, "clip_ratio/high_mean": 0.0005935062863500207, "clip_ratio/low_mean": 0.0003176737091052928, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009111799990932923, "epoch": 10.643731778425655, "grad_norm": 0.1872796267271042, "learning_rate": 1e-06, "loss": -0.079, "step": 1106 }, { "clip_ratio/high_max": 0.0020443053654162213, "clip_ratio/high_mean": 0.0006997932496233261, "clip_ratio/low_mean": 0.00042049508147101733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011202883397345431, "epoch": 10.653061224489797, "grad_norm": 0.16448546946048737, "learning_rate": 1e-06, "loss": -0.056, "step": 1107 }, { "clip_ratio/high_max": 0.0015473074563487899, "clip_ratio/high_mean": 0.000544879676454002, "clip_ratio/low_mean": 0.00041051085781873553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009553905401844531, "epoch": 10.662390670553936, "grad_norm": 0.161103755235672, "learning_rate": 1e-06, "loss": 0.0068, "step": 1108 }, { "clip_ratio/high_max": 0.0018564604251878336, "clip_ratio/high_mean": 0.0006968301477172645, "clip_ratio/low_mean": 0.0003616216745285783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010584518149698852, "epoch": 10.671720116618076, "grad_norm": 0.1994725465774536, "learning_rate": 1e-06, "loss": -0.0109, "step": 1109 }, { "clip_ratio/high_max": 0.001901872325106524, "clip_ratio/high_mean": 0.0006658432575932238, "clip_ratio/low_mean": 0.0005905428270125412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012563860855152598, "epoch": 10.681049562682215, "grad_norm": 0.22094249725341797, "learning_rate": 1e-06, "loss": -0.0202, "step": 1110 }, { "clip_ratio/high_max": 0.0019523299488355406, "clip_ratio/high_mean": 0.0007269930702022975, "clip_ratio/low_mean": 0.00042371965218990226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011507127437653253, "epoch": 10.690379008746355, "grad_norm": 0.1628618985414505, "learning_rate": 1e-06, "loss": -0.0558, "step": 1111 }, { "clip_ratio/high_max": 0.0016521617872058414, "clip_ratio/high_mean": 0.000656830034131417, "clip_ratio/low_mean": 0.0005508878039108822, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012077178762410767, "epoch": 10.699708454810496, "grad_norm": 0.176545187830925, "learning_rate": 1e-06, "loss": -0.0343, "step": 1112 }, { "clip_ratio/high_max": 0.0021427500832942314, "clip_ratio/high_mean": 0.0007997699212864973, "clip_ratio/low_mean": 0.0004877705059698201, "clip_ratio/low_min": 1.526624328107573e-05, "clip_ratio/region_mean": 0.001287540428165812, "epoch": 10.709037900874636, "grad_norm": 0.42195314168930054, "learning_rate": 1e-06, "loss": -0.0345, "step": 1113 }, { "clip_ratio/high_max": 0.0018630820559337735, "clip_ratio/high_mean": 0.000752081929022097, "clip_ratio/low_mean": 0.0006888483349030139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001440930263925111, "epoch": 10.718367346938775, "grad_norm": 0.2006247192621231, "learning_rate": 1e-06, "loss": -0.034, "step": 1114 }, { "clip_ratio/high_max": 0.0024403847419307567, "clip_ratio/high_mean": 0.0007920165426185122, "clip_ratio/low_mean": 0.0005273210908853798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001319337658060249, "epoch": 10.727696793002915, "grad_norm": 0.24720732867717743, "learning_rate": 1e-06, "loss": -0.0314, "step": 1115 }, { "clip_ratio/high_max": 0.001754779146722285, "clip_ratio/high_mean": 0.0006965457469050307, "clip_ratio/low_mean": 0.0005360855766411987, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012326313444646075, "epoch": 10.737026239067056, "grad_norm": 0.15585944056510925, "learning_rate": 1e-06, "loss": -0.0238, "step": 1116 }, { "clip_ratio/high_max": 0.002225590134912636, "clip_ratio/high_mean": 0.0009030855235323543, "clip_ratio/low_mean": 0.00043081909734610235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001333904638158856, "epoch": 10.746355685131196, "grad_norm": 0.1548539251089096, "learning_rate": 1e-06, "loss": -0.1059, "step": 1117 }, { "clip_ratio/high_max": 0.0018461347135598771, "clip_ratio/high_mean": 0.0007682860923523549, "clip_ratio/low_mean": 0.0006940474886505399, "clip_ratio/low_min": 4.9798494728747755e-05, "clip_ratio/region_mean": 0.0014623335482610855, "epoch": 10.755685131195335, "grad_norm": 0.15853171050548553, "learning_rate": 1e-06, "loss": -0.0559, "step": 1118 }, { "clip_ratio/high_max": 0.0024786294234218076, "clip_ratio/high_mean": 0.0008620214557595318, "clip_ratio/low_mean": 0.0007861185667934478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001648140012548538, "epoch": 10.765014577259475, "grad_norm": 0.15479297935962677, "learning_rate": 1e-06, "loss": -0.0165, "step": 1119 }, { "clip_ratio/high_max": 0.0019202508192392997, "clip_ratio/high_mean": 0.0007060389034450054, "clip_ratio/low_mean": 0.0008572725482736132, "clip_ratio/low_min": 3.8343558117048815e-05, "clip_ratio/region_mean": 0.0015633114526281133, "epoch": 10.774344023323614, "grad_norm": 0.2031652331352234, "learning_rate": 1e-06, "loss": -0.0085, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0832868303571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3950.0, "completions/mean_length": 842.6815185546875, "completions/mean_terminated_length": 547.1053466796875, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 10.783673469387756, "frac_reward_zero_std": 0.6835938096046448, "grad_norm": 0.20187096297740936, "learning_rate": 1e-06, "loss": -0.0414, "num_tokens": 646137815.0, "reward": 0.6739676594734192, "reward_std": 0.13773725926876068, "rewards/simpleverify_reward/mean": 0.6739676594734192, "rewards/simpleverify_reward/std": 0.4687756299972534, "step": 1121 }, { "clip_ratio/high_max": 0.0017909975431393832, "clip_ratio/high_mean": 0.0006787763304600958, "clip_ratio/low_mean": 0.00032097662278829375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009997529632528313, "epoch": 10.793002915451895, "grad_norm": 0.17148080468177795, "learning_rate": 1e-06, "loss": -0.055, "step": 1122 }, { "clip_ratio/high_max": 0.0016082797483250033, "clip_ratio/high_mean": 0.000576650447328575, "clip_ratio/low_mean": 0.00043928943250648445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010159398916584905, "epoch": 10.802332361516035, "grad_norm": 0.17019732296466827, "learning_rate": 1e-06, "loss": -0.0172, "step": 1123 }, { "clip_ratio/high_max": 0.0018259788848808967, "clip_ratio/high_mean": 0.0006409966827050084, "clip_ratio/low_mean": 0.0004093186316822539, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010503153098397888, "epoch": 10.811661807580174, "grad_norm": 0.2016351968050003, "learning_rate": 1e-06, "loss": 0.0125, "step": 1124 }, { "clip_ratio/high_max": 0.0017253124751732685, "clip_ratio/high_mean": 0.0006275339692365378, "clip_ratio/low_mean": 0.00040133252150553744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010288664961990435, "epoch": 10.820991253644316, "grad_norm": 0.1555991768836975, "learning_rate": 1e-06, "loss": -0.037, "step": 1125 }, { "clip_ratio/high_max": 0.0015974678863130976, "clip_ratio/high_mean": 0.0006712905305903405, "clip_ratio/low_mean": 0.0004263216460458352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010976121884596068, "epoch": 10.830320699708455, "grad_norm": 0.16266688704490662, "learning_rate": 1e-06, "loss": -0.0672, "step": 1126 }, { "clip_ratio/high_max": 0.0018124190755770542, "clip_ratio/high_mean": 0.0007258192781591788, "clip_ratio/low_mean": 0.00042954662876582006, "clip_ratio/low_min": 1.147842067439342e-05, "clip_ratio/region_mean": 0.0011553659242053982, "epoch": 10.839650145772595, "grad_norm": 0.20607616007328033, "learning_rate": 1e-06, "loss": -0.0317, "step": 1127 }, { "clip_ratio/high_max": 0.001813215079891961, "clip_ratio/high_mean": 0.0006425579936149006, "clip_ratio/low_mean": 0.0005581455861829454, "clip_ratio/low_min": 3.3174099371535704e-05, "clip_ratio/region_mean": 0.0012007036020804662, "epoch": 10.848979591836734, "grad_norm": 0.1741359382867813, "learning_rate": 1e-06, "loss": 0.0217, "step": 1128 }, { "clip_ratio/high_max": 0.0017290683281316888, "clip_ratio/high_mean": 0.0006956594679650152, "clip_ratio/low_mean": 0.0005308724530550535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012265319310245104, "epoch": 10.858309037900874, "grad_norm": 0.20789490640163422, "learning_rate": 1e-06, "loss": -0.029, "step": 1129 }, { "clip_ratio/high_max": 0.0021272173871693667, "clip_ratio/high_mean": 0.0008495976890117163, "clip_ratio/low_mean": 0.0005002335033168492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013498312218871433, "epoch": 10.867638483965015, "grad_norm": 0.19632600247859955, "learning_rate": 1e-06, "loss": -0.0543, "step": 1130 }, { "clip_ratio/high_max": 0.0018723712528299075, "clip_ratio/high_mean": 0.0007610500433656853, "clip_ratio/low_mean": 0.0005949675187366665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013560175684688147, "epoch": 10.876967930029155, "grad_norm": 0.20463590323925018, "learning_rate": 1e-06, "loss": -0.0313, "step": 1131 }, { "clip_ratio/high_max": 0.0023173220797616523, "clip_ratio/high_mean": 0.0008629509757156484, "clip_ratio/low_mean": 0.000603482881160744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014664338414149825, "epoch": 10.886297376093294, "grad_norm": 0.1889830231666565, "learning_rate": 1e-06, "loss": -0.0373, "step": 1132 }, { "clip_ratio/high_max": 0.002014058714848943, "clip_ratio/high_mean": 0.0008416111904807622, "clip_ratio/low_mean": 0.0006512324896448263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014928436794434674, "epoch": 10.895626822157434, "grad_norm": 0.17109835147857666, "learning_rate": 1e-06, "loss": -0.0194, "step": 1133 }, { "clip_ratio/high_max": 0.0022250907932175323, "clip_ratio/high_mean": 0.0008791262080194429, "clip_ratio/low_mean": 0.0004665404176193988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013456666565616615, "epoch": 11.00932944606414, "grad_norm": 0.15316836535930634, "learning_rate": 1e-06, "loss": -0.0745, "step": 1134 }, { "clip_ratio/high_max": 0.0020041695242980495, "clip_ratio/high_mean": 0.0007852609178371495, "clip_ratio/low_mean": 0.0005903584351472091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013756193438894115, "epoch": 11.018658892128279, "grad_norm": 0.20048338174819946, "learning_rate": 1e-06, "loss": -0.0231, "step": 1135 }, { "clip_ratio/high_max": 0.0020115052102482878, "clip_ratio/high_mean": 0.0008700038106326247, "clip_ratio/low_mean": 0.0005790729899217695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014490767825918738, "epoch": 11.02798833819242, "grad_norm": 0.21087542176246643, "learning_rate": 1e-06, "loss": -0.0553, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 827.6454467773438, "completions/mean_terminated_length": 550.666259765625, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 11.03731778425656, "frac_reward_zero_std": 0.699776828289032, "grad_norm": 0.1916263848543167, "learning_rate": 1e-06, "loss": -0.0357, "num_tokens": 654830892.0, "reward": 0.6767578721046448, "reward_std": 0.13004133105278015, "rewards/simpleverify_reward/mean": 0.6767578125, "rewards/simpleverify_reward/std": 0.4677306115627289, "step": 1137 }, { "clip_ratio/high_max": 0.0017678840613370994, "clip_ratio/high_mean": 0.0005944545064267004, "clip_ratio/low_mean": 0.0004487711094043334, "clip_ratio/low_min": 4.140901000937447e-05, "clip_ratio/region_mean": 0.0010432255858177086, "epoch": 11.0466472303207, "grad_norm": 0.17101596295833588, "learning_rate": 1e-06, "loss": -0.0327, "step": 1138 }, { "clip_ratio/high_max": 0.0016050742524384987, "clip_ratio/high_mean": 0.0006081964138502371, "clip_ratio/low_mean": 0.0003504585336031596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000958654954956728, "epoch": 11.055976676384839, "grad_norm": 0.198826864361763, "learning_rate": 1e-06, "loss": -0.0167, "step": 1139 }, { "clip_ratio/high_max": 0.001775891680154018, "clip_ratio/high_mean": 0.0006987477863731328, "clip_ratio/low_mean": 0.00034736257839540485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010461103920533787, "epoch": 11.06530612244898, "grad_norm": 0.20025385916233063, "learning_rate": 1e-06, "loss": -0.046, "step": 1140 }, { "clip_ratio/high_max": 0.0016357771710318048, "clip_ratio/high_mean": 0.0005906113219680265, "clip_ratio/low_mean": 0.0004720095676020719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010626209041220136, "epoch": 11.07463556851312, "grad_norm": 0.1835038959980011, "learning_rate": 1e-06, "loss": -0.0189, "step": 1141 }, { "clip_ratio/high_max": 0.0018436182872392237, "clip_ratio/high_mean": 0.0007245302222145256, "clip_ratio/low_mean": 0.0004066754147515894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011312056631140877, "epoch": 11.08396501457726, "grad_norm": 0.16329391300678253, "learning_rate": 1e-06, "loss": -0.0601, "step": 1142 }, { "clip_ratio/high_max": 0.0016961784713203087, "clip_ratio/high_mean": 0.000681867333696573, "clip_ratio/low_mean": 0.0004947663574057515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011766336974687874, "epoch": 11.093294460641399, "grad_norm": 0.18449606001377106, "learning_rate": 1e-06, "loss": -0.03, "step": 1143 }, { "clip_ratio/high_max": 0.0019206753495382145, "clip_ratio/high_mean": 0.000760139573685592, "clip_ratio/low_mean": 0.0005266381535875553, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012867777732026298, "epoch": 11.102623906705539, "grad_norm": 0.1559375673532486, "learning_rate": 1e-06, "loss": -0.0243, "step": 1144 }, { "clip_ratio/high_max": 0.0017519005450594705, "clip_ratio/high_mean": 0.0006401638602255844, "clip_ratio/low_mean": 0.0006057768705431954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012459407444112003, "epoch": 11.11195335276968, "grad_norm": 0.1801086664199829, "learning_rate": 1e-06, "loss": -0.006, "step": 1145 }, { "clip_ratio/high_max": 0.0015093826768861618, "clip_ratio/high_mean": 0.000583086372444086, "clip_ratio/low_mean": 0.0005685460482709459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001151632433902705, "epoch": 11.12128279883382, "grad_norm": 0.13842403888702393, "learning_rate": 1e-06, "loss": -0.0082, "step": 1146 }, { "clip_ratio/high_max": 0.001932076018420048, "clip_ratio/high_mean": 0.0007152906655392144, "clip_ratio/low_mean": 0.0005045184930168034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012198091462778393, "epoch": 11.130612244897959, "grad_norm": 0.1781458854675293, "learning_rate": 1e-06, "loss": -0.0479, "step": 1147 }, { "clip_ratio/high_max": 0.0020242018181306776, "clip_ratio/high_mean": 0.0007605909213452833, "clip_ratio/low_mean": 0.0005634091339743463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013240000407677144, "epoch": 11.139941690962099, "grad_norm": 0.15394379198551178, "learning_rate": 1e-06, "loss": -0.0337, "step": 1148 }, { "clip_ratio/high_max": 0.0017894217089633457, "clip_ratio/high_mean": 0.0007342949666053755, "clip_ratio/low_mean": 0.0007420426391036017, "clip_ratio/low_min": 1.549330772832036e-05, "clip_ratio/region_mean": 0.0014763376166229136, "epoch": 11.14927113702624, "grad_norm": 7.0090742111206055, "learning_rate": 1e-06, "loss": -0.0152, "step": 1149 }, { "clip_ratio/high_max": 0.002102406469930429, "clip_ratio/high_mean": 0.0008469400545436656, "clip_ratio/low_mean": 0.0005811972723677172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014281372968980577, "epoch": 11.15860058309038, "grad_norm": 0.16473160684108734, "learning_rate": 1e-06, "loss": -0.0338, "step": 1150 }, { "clip_ratio/high_max": 0.0020962019043508917, "clip_ratio/high_mean": 0.0008138018611134612, "clip_ratio/low_mean": 0.0007342968947341433, "clip_ratio/low_min": 1.848018837335985e-05, "clip_ratio/region_mean": 0.001548098818602739, "epoch": 11.167930029154519, "grad_norm": 0.18615761399269104, "learning_rate": 1e-06, "loss": -0.0259, "step": 1151 }, { "clip_ratio/high_max": 0.0018348311496083625, "clip_ratio/high_mean": 0.0007444254370057024, "clip_ratio/low_mean": 0.0006070613453630358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013514867896446958, "epoch": 11.177259475218658, "grad_norm": 0.17484833300113678, "learning_rate": 1e-06, "loss": -0.0815, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0840541294642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 850.0433959960938, "completions/mean_terminated_length": 552.1698608398438, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 11.186588921282798, "frac_reward_zero_std": 0.6735491156578064, "grad_norm": 0.1860561966896057, "learning_rate": 1e-06, "loss": -0.0269, "num_tokens": 663521690.0, "reward": 0.6780134439468384, "reward_std": 0.14223964512348175, "rewards/simpleverify_reward/mean": 0.6780133843421936, "rewards/simpleverify_reward/std": 0.4672541618347168, "step": 1153 }, { "clip_ratio/high_max": 0.0013595425698440522, "clip_ratio/high_mean": 0.0004985205468983622, "clip_ratio/low_mean": 0.0005320165801094845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010305371251888573, "epoch": 11.19591836734694, "grad_norm": 0.1729787141084671, "learning_rate": 1e-06, "loss": 0.0006, "step": 1154 }, { "clip_ratio/high_max": 0.0023920312196423765, "clip_ratio/high_mean": 0.0008053251749515766, "clip_ratio/low_mean": 0.0004455505634268775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012508757390605751, "epoch": 11.205247813411079, "grad_norm": 0.18549862504005432, "learning_rate": 1e-06, "loss": -0.0581, "step": 1155 }, { "clip_ratio/high_max": 0.0017541957713547163, "clip_ratio/high_mean": 0.0006910410756972851, "clip_ratio/low_mean": 0.0003990241930296179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00109006527782185, "epoch": 11.214577259475218, "grad_norm": 0.18506912887096405, "learning_rate": 1e-06, "loss": -0.072, "step": 1156 }, { "clip_ratio/high_max": 0.0017408281710231677, "clip_ratio/high_mean": 0.0006765333273506258, "clip_ratio/low_mean": 0.00041666775291560043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001093201030016644, "epoch": 11.223906705539358, "grad_norm": 0.19058731198310852, "learning_rate": 1e-06, "loss": -0.0313, "step": 1157 }, { "clip_ratio/high_max": 0.002010899901506491, "clip_ratio/high_mean": 0.0007908936659077881, "clip_ratio/low_mean": 0.0005677167082467349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013586103850684594, "epoch": 11.2332361516035, "grad_norm": 0.9535545706748962, "learning_rate": 1e-06, "loss": -0.0117, "step": 1158 }, { "clip_ratio/high_max": 0.002011584841966396, "clip_ratio/high_mean": 0.0007322798865061486, "clip_ratio/low_mean": 0.0005185488716961117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012508287400123663, "epoch": 11.242565597667639, "grad_norm": 0.18431510031223297, "learning_rate": 1e-06, "loss": -0.0583, "step": 1159 }, { "clip_ratio/high_max": 0.0021300434091244824, "clip_ratio/high_mean": 0.0008632733679405646, "clip_ratio/low_mean": 0.0005130674871907104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001376340846036328, "epoch": 11.251895043731778, "grad_norm": 0.1738830953836441, "learning_rate": 1e-06, "loss": -0.0698, "step": 1160 }, { "clip_ratio/high_max": 0.0018154821591451764, "clip_ratio/high_mean": 0.0007090763610904105, "clip_ratio/low_mean": 0.0005790866780444048, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012881630027550273, "epoch": 11.261224489795918, "grad_norm": 0.19961421191692352, "learning_rate": 1e-06, "loss": -0.0019, "step": 1161 }, { "clip_ratio/high_max": 0.0019155304544256069, "clip_ratio/high_mean": 0.0007668724538234528, "clip_ratio/low_mean": 0.000594729783188086, "clip_ratio/low_min": 2.501000381016638e-05, "clip_ratio/region_mean": 0.0013616022260976024, "epoch": 11.270553935860057, "grad_norm": 0.16569894552230835, "learning_rate": 1e-06, "loss": -0.0635, "step": 1162 }, { "clip_ratio/high_max": 0.0020262390171410516, "clip_ratio/high_mean": 0.000763666470447788, "clip_ratio/low_mean": 0.0006696511245536385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001433317578630522, "epoch": 11.279883381924199, "grad_norm": 0.16044855117797852, "learning_rate": 1e-06, "loss": -0.0322, "step": 1163 }, { "clip_ratio/high_max": 0.002265947823616443, "clip_ratio/high_mean": 0.0008358756131201517, "clip_ratio/low_mean": 0.0007722582704445813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016081338908406906, "epoch": 11.289212827988338, "grad_norm": 0.1700512170791626, "learning_rate": 1e-06, "loss": -0.0218, "step": 1164 }, { "clip_ratio/high_max": 0.0021697901029256172, "clip_ratio/high_mean": 0.0008273774037661497, "clip_ratio/low_mean": 0.0006487095743068494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014760870035388507, "epoch": 11.298542274052478, "grad_norm": 0.15288901329040527, "learning_rate": 1e-06, "loss": -0.0553, "step": 1165 }, { "clip_ratio/high_max": 0.002123364854924148, "clip_ratio/high_mean": 0.0007126045475160936, "clip_ratio/low_mean": 0.0006869947028462775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001399599241267424, "epoch": 11.307871720116617, "grad_norm": 0.16187191009521484, "learning_rate": 1e-06, "loss": -0.0701, "step": 1166 }, { "clip_ratio/high_max": 0.002826680436555762, "clip_ratio/high_mean": 0.001084075194739853, "clip_ratio/low_mean": 0.0006558276049872802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017399027819919866, "epoch": 11.317201166180759, "grad_norm": 0.17485423386096954, "learning_rate": 1e-06, "loss": -0.0726, "step": 1167 }, { "clip_ratio/high_max": 0.0025568780038156547, "clip_ratio/high_mean": 0.0010701780938688898, "clip_ratio/low_mean": 0.0007275061470863875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001797684220946394, "epoch": 11.326530612244898, "grad_norm": 1.3297151327133179, "learning_rate": 1e-06, "loss": -0.0638, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0793108258928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 823.3911743164062, "completions/mean_terminated_length": 541.4791259765625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 11.335860058309038, "frac_reward_zero_std": 0.6930803656578064, "grad_norm": 0.1895296424627304, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 672117769.0, "reward": 0.6791294813156128, "reward_std": 0.13220249116420746, "rewards/simpleverify_reward/mean": 0.6791294813156128, "rewards/simpleverify_reward/std": 0.4668274223804474, "step": 1169 }, { "clip_ratio/high_max": 0.0016120809013955295, "clip_ratio/high_mean": 0.0005927190977672581, "clip_ratio/low_mean": 0.00033948248528758995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009322015685029328, "epoch": 11.345189504373177, "grad_norm": 0.161264106631279, "learning_rate": 1e-06, "loss": -0.0344, "step": 1170 }, { "clip_ratio/high_max": 0.0019959576238761656, "clip_ratio/high_mean": 0.0006909567200636957, "clip_ratio/low_mean": 0.0003684030896238255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010593598017294426, "epoch": 11.354518950437317, "grad_norm": 0.1643170565366745, "learning_rate": 1e-06, "loss": -0.0725, "step": 1171 }, { "clip_ratio/high_max": 0.0020908584629069082, "clip_ratio/high_mean": 0.000779731964939856, "clip_ratio/low_mean": 0.0004266338792149327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012063658577972092, "epoch": 11.363848396501458, "grad_norm": 0.3383708596229553, "learning_rate": 1e-06, "loss": -0.0453, "step": 1172 }, { "clip_ratio/high_max": 0.0020177868063910864, "clip_ratio/high_mean": 0.0008225943147408543, "clip_ratio/low_mean": 0.00042660713734221645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012492014648159966, "epoch": 11.373177842565598, "grad_norm": 0.1673111617565155, "learning_rate": 1e-06, "loss": -0.0439, "step": 1173 }, { "clip_ratio/high_max": 0.00182940899321693, "clip_ratio/high_mean": 0.0007208046054074657, "clip_ratio/low_mean": 0.0005719893097193562, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001292793906031875, "epoch": 11.382507288629737, "grad_norm": 0.16559411585330963, "learning_rate": 1e-06, "loss": -0.0181, "step": 1174 }, { "clip_ratio/high_max": 0.001771357037796406, "clip_ratio/high_mean": 0.0006864273918836261, "clip_ratio/low_mean": 0.0004274192583579861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011138466670672642, "epoch": 11.391836734693877, "grad_norm": 0.21306878328323364, "learning_rate": 1e-06, "loss": -0.0289, "step": 1175 }, { "clip_ratio/high_max": 0.001910840574055328, "clip_ratio/high_mean": 0.0007389796292045503, "clip_ratio/low_mean": 0.0005342542112884985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012732338582281955, "epoch": 11.401166180758018, "grad_norm": 0.1605527102947235, "learning_rate": 1e-06, "loss": -0.0567, "step": 1176 }, { "clip_ratio/high_max": 0.0019968243141192943, "clip_ratio/high_mean": 0.0007217308248073095, "clip_ratio/low_mean": 0.0005365002771213767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012582311537698843, "epoch": 11.410495626822158, "grad_norm": 0.1748780459165573, "learning_rate": 1e-06, "loss": -0.0501, "step": 1177 }, { "clip_ratio/high_max": 0.0019419398631725926, "clip_ratio/high_mean": 0.0007158091866585892, "clip_ratio/low_mean": 0.0005713624341296963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012871716244262643, "epoch": 11.419825072886297, "grad_norm": 0.17018413543701172, "learning_rate": 1e-06, "loss": -0.0332, "step": 1178 }, { "clip_ratio/high_max": 0.0023733874259050936, "clip_ratio/high_mean": 0.0008626770049886545, "clip_ratio/low_mean": 0.0006219665729076951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014846435442450456, "epoch": 11.429154518950437, "grad_norm": 0.20150460302829742, "learning_rate": 1e-06, "loss": -0.0533, "step": 1179 }, { "clip_ratio/high_max": 0.0021800849499413744, "clip_ratio/high_mean": 0.000875190020451555, "clip_ratio/low_mean": 0.0005858655695192283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014610555917897727, "epoch": 11.438483965014576, "grad_norm": 0.1383594125509262, "learning_rate": 1e-06, "loss": -0.0664, "step": 1180 }, { "clip_ratio/high_max": 0.0019043025240534917, "clip_ratio/high_mean": 0.0006990951660554856, "clip_ratio/low_mean": 0.0006656854129687417, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013647805790242273, "epoch": 11.447813411078718, "grad_norm": 0.21342889964580536, "learning_rate": 1e-06, "loss": -0.0168, "step": 1181 }, { "clip_ratio/high_max": 0.0019240249966969714, "clip_ratio/high_mean": 0.0006824288402640377, "clip_ratio/low_mean": 0.000716544920578599, "clip_ratio/low_min": 2.3447757484973408e-05, "clip_ratio/region_mean": 0.0013989737890369724, "epoch": 11.457142857142857, "grad_norm": 0.20573925971984863, "learning_rate": 1e-06, "loss": -0.0037, "step": 1182 }, { "clip_ratio/high_max": 0.002208284458902199, "clip_ratio/high_mean": 0.0008899109852791298, "clip_ratio/low_mean": 0.0006012579101479787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014911688776919618, "epoch": 11.466472303206997, "grad_norm": 0.1745227873325348, "learning_rate": 1e-06, "loss": -0.0184, "step": 1183 }, { "clip_ratio/high_max": 0.0023789019833202474, "clip_ratio/high_mean": 0.0009522772870695917, "clip_ratio/low_mean": 0.000585944948170436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015382222300104331, "epoch": 11.475801749271136, "grad_norm": 0.17794568836688995, "learning_rate": 1e-06, "loss": -0.0695, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0765206473214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 811.7434692382812, "completions/mean_terminated_length": 539.6058959960938, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 11.485131195335278, "frac_reward_zero_std": 0.699776828289032, "grad_norm": 0.1689937263727188, "learning_rate": 1e-06, "loss": -0.0633, "num_tokens": 680669307.0, "reward": 0.6896623969078064, "reward_std": 0.12979908287525177, "rewards/simpleverify_reward/mean": 0.6896623969078064, "rewards/simpleverify_reward/std": 0.462647944688797, "step": 1185 }, { "clip_ratio/high_max": 0.0016310309438267723, "clip_ratio/high_mean": 0.0005990013687551254, "clip_ratio/low_mean": 0.0004432505288605171, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010422519044368528, "epoch": 11.494460641399417, "grad_norm": 1.5891648530960083, "learning_rate": 1e-06, "loss": -0.0089, "step": 1186 }, { "clip_ratio/high_max": 0.0018713594254222699, "clip_ratio/high_mean": 0.0007019793029030552, "clip_ratio/low_mean": 0.00037519404122576816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010771733068395406, "epoch": 11.503790087463557, "grad_norm": 0.16163037717342377, "learning_rate": 1e-06, "loss": -0.0491, "step": 1187 }, { "clip_ratio/high_max": 0.0016048328216129448, "clip_ratio/high_mean": 0.0004987088223060709, "clip_ratio/low_mean": 0.0004936831969644118, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009923920151777565, "epoch": 11.513119533527696, "grad_norm": 0.20826947689056396, "learning_rate": 1e-06, "loss": 0.014, "step": 1188 }, { "clip_ratio/high_max": 0.0020944758216501214, "clip_ratio/high_mean": 0.0007864245017117355, "clip_ratio/low_mean": 0.0005079539150756318, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012943784458911978, "epoch": 11.522448979591836, "grad_norm": 0.31177371740341187, "learning_rate": 1e-06, "loss": -0.053, "step": 1189 }, { "clip_ratio/high_max": 0.0019571459051803686, "clip_ratio/high_mean": 0.0007689653020861442, "clip_ratio/low_mean": 0.00048626103784954466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012552263178804424, "epoch": 11.531778425655977, "grad_norm": 0.1877303421497345, "learning_rate": 1e-06, "loss": -0.0781, "step": 1190 }, { "clip_ratio/high_max": 0.002065192260488402, "clip_ratio/high_mean": 0.0006944797078176634, "clip_ratio/low_mean": 0.0004174225850874791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011119022710772697, "epoch": 11.541107871720117, "grad_norm": 0.7128690481185913, "learning_rate": 1e-06, "loss": -0.0354, "step": 1191 }, { "clip_ratio/high_max": 0.001997567313082982, "clip_ratio/high_mean": 0.0007160355416999664, "clip_ratio/low_mean": 0.0005031653267906222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012192008834972512, "epoch": 11.550437317784256, "grad_norm": 0.21461345255374908, "learning_rate": 1e-06, "loss": -0.029, "step": 1192 }, { "clip_ratio/high_max": 0.002148578168998938, "clip_ratio/high_mean": 0.0006751655623702391, "clip_ratio/low_mean": 0.0006984549490880454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001373620492813643, "epoch": 11.559766763848396, "grad_norm": 1.1211912631988525, "learning_rate": 1e-06, "loss": -0.0104, "step": 1193 }, { "clip_ratio/high_max": 0.002048307600489352, "clip_ratio/high_mean": 0.0007850661668271641, "clip_ratio/low_mean": 0.0005256767731225409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013107429331284948, "epoch": 11.569096209912537, "grad_norm": 0.18824870884418488, "learning_rate": 1e-06, "loss": -0.0426, "step": 1194 }, { "clip_ratio/high_max": 0.0019071520364377648, "clip_ratio/high_mean": 0.0007524462180299452, "clip_ratio/low_mean": 0.0006784313454772928, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001430877557140775, "epoch": 11.578425655976677, "grad_norm": 0.23178794980049133, "learning_rate": 1e-06, "loss": -0.006, "step": 1195 }, { "clip_ratio/high_max": 0.002274401642353041, "clip_ratio/high_mean": 0.0008203393917938229, "clip_ratio/low_mean": 0.000695900145728956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015162395357037894, "epoch": 11.587755102040816, "grad_norm": 0.1998719424009323, "learning_rate": 1e-06, "loss": -0.0277, "step": 1196 }, { "clip_ratio/high_max": 0.0023895850681583397, "clip_ratio/high_mean": 0.0008788964569248492, "clip_ratio/low_mean": 0.0006737762271313841, "clip_ratio/low_min": 6.092805779189803e-05, "clip_ratio/region_mean": 0.0015526726747339126, "epoch": 11.597084548104956, "grad_norm": 0.3447187840938568, "learning_rate": 1e-06, "loss": -0.0599, "step": 1197 }, { "clip_ratio/high_max": 0.0021128683802089654, "clip_ratio/high_mean": 0.0007505947432946414, "clip_ratio/low_mean": 0.0007332621735258726, "clip_ratio/low_min": 1.6951451470959e-05, "clip_ratio/region_mean": 0.0014838569222774822, "epoch": 11.606413994169095, "grad_norm": 0.1450885832309723, "learning_rate": 1e-06, "loss": -0.0183, "step": 1198 }, { "clip_ratio/high_max": 0.0020537288510240614, "clip_ratio/high_mean": 0.0007904223293735413, "clip_ratio/low_mean": 0.0007765292266412871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015669515523768496, "epoch": 11.615743440233237, "grad_norm": 0.16557881236076355, "learning_rate": 1e-06, "loss": -0.0286, "step": 1199 }, { "clip_ratio/high_max": 0.0025270568221458234, "clip_ratio/high_mean": 0.0008935994155763183, "clip_ratio/low_mean": 0.0007613431980644236, "clip_ratio/low_min": 3.3614200219744816e-05, "clip_ratio/region_mean": 0.0016549426800338551, "epoch": 11.625072886297376, "grad_norm": 0.1890602707862854, "learning_rate": 1e-06, "loss": -0.016, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0877511160714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 856.1781616210938, "completions/mean_terminated_length": 544.532958984375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 11.634402332361516, "frac_reward_zero_std": 0.6902902126312256, "grad_norm": 0.2349931299686432, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 689244573.0, "reward": 0.6702009439468384, "reward_std": 0.13530337810516357, "rewards/simpleverify_reward/mean": 0.6702008843421936, "rewards/simpleverify_reward/std": 0.4701564311981201, "step": 1201 }, { "clip_ratio/high_max": 0.0014772420399822295, "clip_ratio/high_mean": 0.0005543290617424645, "clip_ratio/low_mean": 0.00037976387602611794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009340929464087822, "epoch": 11.643731778425655, "grad_norm": 0.17620255053043365, "learning_rate": 1e-06, "loss": -0.0352, "step": 1202 }, { "clip_ratio/high_max": 0.0016623389237793162, "clip_ratio/high_mean": 0.0006210441151779378, "clip_ratio/low_mean": 0.00034741963941087306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009684637643658789, "epoch": 11.653061224489797, "grad_norm": 0.16394369304180145, "learning_rate": 1e-06, "loss": -0.0495, "step": 1203 }, { "clip_ratio/high_max": 0.0020965240109944716, "clip_ratio/high_mean": 0.0007423149045280297, "clip_ratio/low_mean": 0.0004029212786917924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011452361759438645, "epoch": 11.662390670553936, "grad_norm": 0.1681303232908249, "learning_rate": 1e-06, "loss": -0.0435, "step": 1204 }, { "clip_ratio/high_max": 0.001530017969344044, "clip_ratio/high_mean": 0.0005894053019801504, "clip_ratio/low_mean": 0.0003908914363819349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000980296747002285, "epoch": 11.671720116618076, "grad_norm": 0.1840282678604126, "learning_rate": 1e-06, "loss": -0.0127, "step": 1205 }, { "clip_ratio/high_max": 0.0016100245011330117, "clip_ratio/high_mean": 0.0004997503747290466, "clip_ratio/low_mean": 0.0003762770948014804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008760274686210323, "epoch": 11.681049562682215, "grad_norm": 0.2301138937473297, "learning_rate": 1e-06, "loss": -0.0162, "step": 1206 }, { "clip_ratio/high_max": 0.0018777306795527693, "clip_ratio/high_mean": 0.0007815293456587824, "clip_ratio/low_mean": 0.0005023669764341321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001283896326640388, "epoch": 11.690379008746355, "grad_norm": 0.24430108070373535, "learning_rate": 1e-06, "loss": -0.0094, "step": 1207 }, { "clip_ratio/high_max": 0.00208598483732203, "clip_ratio/high_mean": 0.0008314116948895389, "clip_ratio/low_mean": 0.0005159601878403919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001347371897281846, "epoch": 11.699708454810496, "grad_norm": 0.2100919634103775, "learning_rate": 1e-06, "loss": -0.0334, "step": 1208 }, { "clip_ratio/high_max": 0.0023020478256512433, "clip_ratio/high_mean": 0.0008213776454795152, "clip_ratio/low_mean": 0.0005133674108037667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001334745091298828, "epoch": 11.709037900874636, "grad_norm": 0.21932166814804077, "learning_rate": 1e-06, "loss": -0.0673, "step": 1209 }, { "clip_ratio/high_max": 0.0018564966849226039, "clip_ratio/high_mean": 0.000696360615620506, "clip_ratio/low_mean": 0.0005662058338202769, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012625664348888677, "epoch": 11.718367346938775, "grad_norm": 0.22318029403686523, "learning_rate": 1e-06, "loss": -0.0604, "step": 1210 }, { "clip_ratio/high_max": 0.001961336914973799, "clip_ratio/high_mean": 0.0007000751111263526, "clip_ratio/low_mean": 0.0005377161633077776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00123779128989554, "epoch": 11.727696793002915, "grad_norm": 0.19144804775714874, "learning_rate": 1e-06, "loss": -0.0209, "step": 1211 }, { "clip_ratio/high_max": 0.0014867949103063438, "clip_ratio/high_mean": 0.0006499498913399293, "clip_ratio/low_mean": 0.000537989360964275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011879392459377414, "epoch": 11.737026239067056, "grad_norm": 0.18445070087909698, "learning_rate": 1e-06, "loss": -0.0284, "step": 1212 }, { "clip_ratio/high_max": 0.002377647273533512, "clip_ratio/high_mean": 0.0008543151088815648, "clip_ratio/low_mean": 0.0005989043775116443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014532194691128097, "epoch": 11.746355685131196, "grad_norm": 0.18642479181289673, "learning_rate": 1e-06, "loss": -0.0806, "step": 1213 }, { "clip_ratio/high_max": 0.0019288598850835115, "clip_ratio/high_mean": 0.000773701693105977, "clip_ratio/low_mean": 0.0005786708120467665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001352372481051134, "epoch": 11.755685131195335, "grad_norm": 0.1861509382724762, "learning_rate": 1e-06, "loss": -0.0241, "step": 1214 }, { "clip_ratio/high_max": 0.00206952536245808, "clip_ratio/high_mean": 0.0008690855538588949, "clip_ratio/low_mean": 0.0006392208856595971, "clip_ratio/low_min": 4.8138637794181705e-05, "clip_ratio/region_mean": 0.0015083064608916175, "epoch": 11.765014577259475, "grad_norm": 0.34649738669395447, "learning_rate": 1e-06, "loss": -0.0732, "step": 1215 }, { "clip_ratio/high_max": 0.0019381642377993558, "clip_ratio/high_mean": 0.0007550556656497065, "clip_ratio/low_mean": 0.0006029759906596155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013580316608567955, "epoch": 11.774344023323614, "grad_norm": 23.721725463867188, "learning_rate": 1e-06, "loss": -0.0395, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0875418526785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 857.3679809570312, "completions/mean_terminated_length": 546.6514282226562, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 11.783673469387756, "frac_reward_zero_std": 0.684151828289032, "grad_norm": 0.21400530636310577, "learning_rate": 1e-06, "loss": -0.0289, "num_tokens": 697833328.0, "reward": 0.6732701063156128, "reward_std": 0.1372433304786682, "rewards/simpleverify_reward/mean": 0.6732701063156128, "rewards/simpleverify_reward/std": 0.4690339267253876, "step": 1217 }, { "clip_ratio/high_max": 0.0017350783600704744, "clip_ratio/high_mean": 0.000673401456879219, "clip_ratio/low_mean": 0.0004032040415040683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010766054947453085, "epoch": 11.793002915451895, "grad_norm": 0.1791912317276001, "learning_rate": 1e-06, "loss": -0.0631, "step": 1218 }, { "clip_ratio/high_max": 0.0017892159485199954, "clip_ratio/high_mean": 0.0006245368131203577, "clip_ratio/low_mean": 0.0004809938009202597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011055306204070803, "epoch": 11.802332361516035, "grad_norm": 0.18060512840747833, "learning_rate": 1e-06, "loss": -0.0425, "step": 1219 }, { "clip_ratio/high_max": 0.0017040536913555115, "clip_ratio/high_mean": 0.0006656395526078995, "clip_ratio/low_mean": 0.0004659974201786099, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011316369855194353, "epoch": 11.811661807580174, "grad_norm": 0.20743513107299805, "learning_rate": 1e-06, "loss": -0.0299, "step": 1220 }, { "clip_ratio/high_max": 0.0021639199621859007, "clip_ratio/high_mean": 0.0008440694909950253, "clip_ratio/low_mean": 0.0004827157683848782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013267852627905086, "epoch": 11.820991253644316, "grad_norm": 0.18194562196731567, "learning_rate": 1e-06, "loss": -0.0875, "step": 1221 }, { "clip_ratio/high_max": 0.0016608118785370607, "clip_ratio/high_mean": 0.0006116412023402518, "clip_ratio/low_mean": 0.0005396412661866634, "clip_ratio/low_min": 1.5078408068802673e-05, "clip_ratio/region_mean": 0.001151282456703484, "epoch": 11.830320699708455, "grad_norm": 0.20806901156902313, "learning_rate": 1e-06, "loss": -0.0633, "step": 1222 }, { "clip_ratio/high_max": 0.0022561110818060115, "clip_ratio/high_mean": 0.0007976540464369464, "clip_ratio/low_mean": 0.0004949860508531856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012926401068398263, "epoch": 11.839650145772595, "grad_norm": 0.1666315495967865, "learning_rate": 1e-06, "loss": -0.0467, "step": 1223 }, { "clip_ratio/high_max": 0.0021539592307817657, "clip_ratio/high_mean": 0.0007506960882892599, "clip_ratio/low_mean": 0.0005438008074634126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001294496931222966, "epoch": 11.848979591836734, "grad_norm": 0.17080296576023102, "learning_rate": 1e-06, "loss": -0.0399, "step": 1224 }, { "clip_ratio/high_max": 0.0017484037816757336, "clip_ratio/high_mean": 0.0006977073062444106, "clip_ratio/low_mean": 0.0007309733791771578, "clip_ratio/low_min": 2.7250925995758735e-05, "clip_ratio/region_mean": 0.0014286807072494412, "epoch": 11.858309037900874, "grad_norm": 0.1808101236820221, "learning_rate": 1e-06, "loss": 0.0131, "step": 1225 }, { "clip_ratio/high_max": 0.002101078433042858, "clip_ratio/high_mean": 0.0008223442291637184, "clip_ratio/low_mean": 0.0008208623276004801, "clip_ratio/low_min": 2.090300949930679e-05, "clip_ratio/region_mean": 0.0016432065676781349, "epoch": 11.867638483965015, "grad_norm": 0.18547481298446655, "learning_rate": 1e-06, "loss": -0.0391, "step": 1226 }, { "clip_ratio/high_max": 0.002333036249183351, "clip_ratio/high_mean": 0.0008265988126368029, "clip_ratio/low_mean": 0.0006712916078868147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014978904000599869, "epoch": 11.876967930029155, "grad_norm": 0.34613555669784546, "learning_rate": 1e-06, "loss": -0.0272, "step": 1227 }, { "clip_ratio/high_max": 0.001981951663765358, "clip_ratio/high_mean": 0.0008465292030450655, "clip_ratio/low_mean": 0.00069602684743586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015425560050061904, "epoch": 11.886297376093294, "grad_norm": 0.43101322650909424, "learning_rate": 1e-06, "loss": -0.0176, "step": 1228 }, { "clip_ratio/high_max": 0.0020465676207095385, "clip_ratio/high_mean": 0.0008120898201013915, "clip_ratio/low_mean": 0.0006627344591834117, "clip_ratio/low_min": 1.44877139973687e-05, "clip_ratio/region_mean": 0.0014748242538189515, "epoch": 11.895626822157434, "grad_norm": 0.23350341618061066, "learning_rate": 1e-06, "loss": -0.0445, "step": 1229 }, { "clip_ratio/high_max": 0.0020995127451897133, "clip_ratio/high_mean": 0.0007924211731733521, "clip_ratio/low_mean": 0.0007187205301306676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015111417014850304, "epoch": 12.00932944606414, "grad_norm": 0.18322347104549408, "learning_rate": 1e-06, "loss": -0.0173, "step": 1230 }, { "clip_ratio/high_max": 0.0021064992470201105, "clip_ratio/high_mean": 0.0008104395601549186, "clip_ratio/low_mean": 0.000661797052998736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014722365776833612, "epoch": 12.018658892128279, "grad_norm": 0.17127804458141327, "learning_rate": 1e-06, "loss": -0.0397, "step": 1231 }, { "clip_ratio/high_max": 0.0020971320336684585, "clip_ratio/high_mean": 0.0008380976014450425, "clip_ratio/low_mean": 0.0006451907861446671, "clip_ratio/low_min": 1.1156729669892229e-05, "clip_ratio/region_mean": 0.0014832883898634464, "epoch": 12.02798833819242, "grad_norm": 0.1832875907421112, "learning_rate": 1e-06, "loss": -0.0601, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0851004464285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 847.820068359375, "completions/mean_terminated_length": 545.6868286132812, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 12.03731778425656, "frac_reward_zero_std": 0.7053571939468384, "grad_norm": 0.18499420583248138, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 706395156.0, "reward": 0.6823381781578064, "reward_std": 0.12846894562244415, "rewards/simpleverify_reward/mean": 0.6823381781578064, "rewards/simpleverify_reward/std": 0.4655833840370178, "step": 1233 }, { "clip_ratio/high_max": 0.0013660860659001628, "clip_ratio/high_mean": 0.000488810018850927, "clip_ratio/low_mean": 0.00037452259039127966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008633326087874593, "epoch": 12.0466472303207, "grad_norm": 0.18263275921344757, "learning_rate": 1e-06, "loss": -0.0612, "step": 1234 }, { "clip_ratio/high_max": 0.001648705107072601, "clip_ratio/high_mean": 0.0006553950261150021, "clip_ratio/low_mean": 0.0003417546965920337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009971497165679466, "epoch": 12.055976676384839, "grad_norm": 0.21805453300476074, "learning_rate": 1e-06, "loss": -0.064, "step": 1235 }, { "clip_ratio/high_max": 0.0019877051818184555, "clip_ratio/high_mean": 0.000690754931383708, "clip_ratio/low_mean": 0.00033156254767163773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010223174867860507, "epoch": 12.06530612244898, "grad_norm": 0.18564294278621674, "learning_rate": 1e-06, "loss": -0.086, "step": 1236 }, { "clip_ratio/high_max": 0.0025813238316914067, "clip_ratio/high_mean": 0.0008632383905933239, "clip_ratio/low_mean": 0.0003532865102897631, "clip_ratio/low_min": 1.4803410522290505e-05, "clip_ratio/region_mean": 0.001216524851770373, "epoch": 12.07463556851312, "grad_norm": 0.1674908995628357, "learning_rate": 1e-06, "loss": -0.0672, "step": 1237 }, { "clip_ratio/high_max": 0.001713443809421733, "clip_ratio/high_mean": 0.0007101443843566813, "clip_ratio/low_mean": 0.00037144387533771805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001081588266970357, "epoch": 12.08396501457726, "grad_norm": 0.16276459395885468, "learning_rate": 1e-06, "loss": -0.0557, "step": 1238 }, { "clip_ratio/high_max": 0.002133135414624121, "clip_ratio/high_mean": 0.0007403942145174369, "clip_ratio/low_mean": 0.0005760916701547103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013164858646632638, "epoch": 12.093294460641399, "grad_norm": 0.2175583839416504, "learning_rate": 1e-06, "loss": -0.0357, "step": 1239 }, { "clip_ratio/high_max": 0.0016712912220100407, "clip_ratio/high_mean": 0.0006739416048731073, "clip_ratio/low_mean": 0.0005821454869874287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001256087067304179, "epoch": 12.102623906705539, "grad_norm": 0.17243611812591553, "learning_rate": 1e-06, "loss": -0.0473, "step": 1240 }, { "clip_ratio/high_max": 0.0014721443330927286, "clip_ratio/high_mean": 0.0006494599820143776, "clip_ratio/low_mean": 0.0004960904734616634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011455504536570515, "epoch": 12.11195335276968, "grad_norm": 47.8651123046875, "learning_rate": 1e-06, "loss": 0.0118, "step": 1241 }, { "clip_ratio/high_max": 0.0019142473029205576, "clip_ratio/high_mean": 0.0007495986001231358, "clip_ratio/low_mean": 0.00049087731349573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001240475907252403, "epoch": 12.12128279883382, "grad_norm": 0.17776606976985931, "learning_rate": 1e-06, "loss": -0.0532, "step": 1242 }, { "clip_ratio/high_max": 0.0014867958052491304, "clip_ratio/high_mean": 0.0005051594089309219, "clip_ratio/low_mean": 0.0008070458570728078, "clip_ratio/low_min": 1.969744698726572e-05, "clip_ratio/region_mean": 0.0013122052550897934, "epoch": 12.130612244897959, "grad_norm": 0.16309450566768646, "learning_rate": 1e-06, "loss": -0.0146, "step": 1243 }, { "clip_ratio/high_max": 0.0019658166893350426, "clip_ratio/high_mean": 0.0006932121050340356, "clip_ratio/low_mean": 0.0007531641058449168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001446376209059963, "epoch": 12.139941690962099, "grad_norm": 0.3122186064720154, "learning_rate": 1e-06, "loss": 0.0041, "step": 1244 }, { "clip_ratio/high_max": 0.0025209564264514484, "clip_ratio/high_mean": 0.0008778625488048419, "clip_ratio/low_mean": 0.0005517610884453461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014296236258815043, "epoch": 12.14927113702624, "grad_norm": 0.15574276447296143, "learning_rate": 1e-06, "loss": -0.04, "step": 1245 }, { "clip_ratio/high_max": 0.0021479748356796335, "clip_ratio/high_mean": 0.000849766742248903, "clip_ratio/low_mean": 0.000628763827990042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00147853052476421, "epoch": 12.15860058309038, "grad_norm": 0.1873592734336853, "learning_rate": 1e-06, "loss": -0.0408, "step": 1246 }, { "clip_ratio/high_max": 0.0020712746918434277, "clip_ratio/high_mean": 0.0007462596659024712, "clip_ratio/low_mean": 0.0005662357925757533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013124954639351927, "epoch": 12.167930029154519, "grad_norm": 462.0624084472656, "learning_rate": 1e-06, "loss": 0.4888, "step": 1247 }, { "clip_ratio/high_max": 0.001986936382309068, "clip_ratio/high_mean": 0.000785216302574554, "clip_ratio/low_mean": 0.0006431230613088701, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00142833936115494, "epoch": 12.177259475218658, "grad_norm": 0.18139177560806274, "learning_rate": 1e-06, "loss": -0.0356, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0830775669642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 844.7650146484375, "completions/mean_terminated_length": 550.1875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 12.186588921282798, "frac_reward_zero_std": 0.672433078289032, "grad_norm": 0.22511444985866547, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 715048579.0, "reward": 0.6810128688812256, "reward_std": 0.14523962140083313, "rewards/simpleverify_reward/mean": 0.6810128092765808, "rewards/simpleverify_reward/std": 0.4661003351211548, "step": 1249 }, { "clip_ratio/high_max": 0.0016212120153795695, "clip_ratio/high_mean": 0.0005950769573246362, "clip_ratio/low_mean": 0.00040784119300951716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010029181739810156, "epoch": 12.19591836734694, "grad_norm": 0.18443210422992706, "learning_rate": 1e-06, "loss": -0.0384, "step": 1250 }, { "clip_ratio/high_max": 0.0018785231659421697, "clip_ratio/high_mean": 0.0006483120687335031, "clip_ratio/low_mean": 0.0004199825752948527, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010682946522138081, "epoch": 12.205247813411079, "grad_norm": 0.19146081805229187, "learning_rate": 1e-06, "loss": -0.0141, "step": 1251 }, { "clip_ratio/high_max": 0.0021547830183408223, "clip_ratio/high_mean": 0.0007725075047346763, "clip_ratio/low_mean": 0.00044083839247832657, "clip_ratio/low_min": 1.7327418390777893e-05, "clip_ratio/region_mean": 0.0012133458694734145, "epoch": 12.214577259475218, "grad_norm": 0.18842914700508118, "learning_rate": 1e-06, "loss": -0.0776, "step": 1252 }, { "clip_ratio/high_max": 0.002238974513602443, "clip_ratio/high_mean": 0.0009069215957424603, "clip_ratio/low_mean": 0.0004245125337547506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013314341376826633, "epoch": 12.223906705539358, "grad_norm": 0.3680884838104248, "learning_rate": 1e-06, "loss": -0.0832, "step": 1253 }, { "clip_ratio/high_max": 0.0019391473251744173, "clip_ratio/high_mean": 0.000849477828523959, "clip_ratio/low_mean": 0.0004482902695599478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001297768121730769, "epoch": 12.2332361516035, "grad_norm": 0.8947314023971558, "learning_rate": 1e-06, "loss": -0.0641, "step": 1254 }, { "clip_ratio/high_max": 0.0021562756774073932, "clip_ratio/high_mean": 0.0008575365081924247, "clip_ratio/low_mean": 0.0005877524863535655, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014452890100074, "epoch": 12.242565597667639, "grad_norm": 0.203543022274971, "learning_rate": 1e-06, "loss": -0.0143, "step": 1255 }, { "clip_ratio/high_max": 0.002429372056212742, "clip_ratio/high_mean": 0.0009672902415331919, "clip_ratio/low_mean": 0.0006072733726796287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001574563608301105, "epoch": 12.251895043731778, "grad_norm": 0.20951814949512482, "learning_rate": 1e-06, "loss": -0.0577, "step": 1256 }, { "clip_ratio/high_max": 0.0021263568705762736, "clip_ratio/high_mean": 0.0008680798819113988, "clip_ratio/low_mean": 0.0006307773974185693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001498857276601484, "epoch": 12.261224489795918, "grad_norm": 0.18112505972385406, "learning_rate": 1e-06, "loss": -0.0645, "step": 1257 }, { "clip_ratio/high_max": 0.0024317611823789775, "clip_ratio/high_mean": 0.0008778495175647549, "clip_ratio/low_mean": 0.0007025751528999535, "clip_ratio/low_min": 1.5806777810212225e-05, "clip_ratio/region_mean": 0.001580424686835613, "epoch": 12.270553935860057, "grad_norm": 0.2067062258720398, "learning_rate": 1e-06, "loss": -0.0545, "step": 1258 }, { "clip_ratio/high_max": 0.0017886358182295226, "clip_ratio/high_mean": 0.0007343305296672042, "clip_ratio/low_mean": 0.0007935896956041688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015279202270903625, "epoch": 12.279883381924199, "grad_norm": 0.1783149540424347, "learning_rate": 1e-06, "loss": -0.0177, "step": 1259 }, { "clip_ratio/high_max": 0.002422168690827675, "clip_ratio/high_mean": 0.00092441969900392, "clip_ratio/low_mean": 0.0007074150989865302, "clip_ratio/low_min": 1.9066503227804787e-05, "clip_ratio/region_mean": 0.0016318347843480296, "epoch": 12.289212827988338, "grad_norm": 0.23151107132434845, "learning_rate": 1e-06, "loss": -0.0498, "step": 1260 }, { "clip_ratio/high_max": 0.0022317694892990403, "clip_ratio/high_mean": 0.0009297953474742826, "clip_ratio/low_mean": 0.0006829321928307763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016127275412145536, "epoch": 12.298542274052478, "grad_norm": 0.2865244150161743, "learning_rate": 1e-06, "loss": -0.0692, "step": 1261 }, { "clip_ratio/high_max": 0.0020466257592488546, "clip_ratio/high_mean": 0.0009911066363201826, "clip_ratio/low_mean": 0.0007337418483075453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001724848429148551, "epoch": 12.307871720116617, "grad_norm": 0.17314448952674866, "learning_rate": 1e-06, "loss": -0.0745, "step": 1262 }, { "clip_ratio/high_max": 0.0024659188129589893, "clip_ratio/high_mean": 0.000887392364347761, "clip_ratio/low_mean": 0.0008041871778914356, "clip_ratio/low_min": 3.2224801543634385e-05, "clip_ratio/region_mean": 0.0016915795386012178, "epoch": 12.317201166180759, "grad_norm": 0.19004984200000763, "learning_rate": 1e-06, "loss": -0.0224, "step": 1263 }, { "clip_ratio/high_max": 0.0025069318944588304, "clip_ratio/high_mean": 0.0009603465441614389, "clip_ratio/low_mean": 0.0006634616656810977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016238082680501975, "epoch": 12.326530612244898, "grad_norm": 0.16081392765045166, "learning_rate": 1e-06, "loss": -0.0689, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0853794642857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 843.2838134765625, "completions/mean_terminated_length": 539.6439819335938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 12.335860058309038, "frac_reward_zero_std": 0.703683078289032, "grad_norm": 0.17850758135318756, "learning_rate": 1e-06, "loss": -0.0219, "num_tokens": 723550055.0, "reward": 0.6745256781578064, "reward_std": 0.12982994318008423, "rewards/simpleverify_reward/mean": 0.6745256781578064, "rewards/simpleverify_reward/std": 0.4685681462287903, "step": 1265 }, { "clip_ratio/high_max": 0.0014517713534587529, "clip_ratio/high_mean": 0.0005351601776055759, "clip_ratio/low_mean": 0.0004069530000379018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000942113169003278, "epoch": 12.345189504373177, "grad_norm": 0.19244447350502014, "learning_rate": 1e-06, "loss": -0.0296, "step": 1266 }, { "clip_ratio/high_max": 0.001664968751356355, "clip_ratio/high_mean": 0.0006206710495462175, "clip_ratio/low_mean": 0.00035489466426952276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009755657119967509, "epoch": 12.354518950437317, "grad_norm": 0.19948484003543854, "learning_rate": 1e-06, "loss": -0.0368, "step": 1267 }, { "clip_ratio/high_max": 0.0019241897243773565, "clip_ratio/high_mean": 0.0006859852237539599, "clip_ratio/low_mean": 0.0004398879591462901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011258732120040804, "epoch": 12.363848396501458, "grad_norm": 0.19461487233638763, "learning_rate": 1e-06, "loss": -0.0246, "step": 1268 }, { "clip_ratio/high_max": 0.00151098293645191, "clip_ratio/high_mean": 0.0005928654663875932, "clip_ratio/low_mean": 0.00034679987311392324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009396653331350535, "epoch": 12.373177842565598, "grad_norm": 0.1673404574394226, "learning_rate": 1e-06, "loss": -0.054, "step": 1269 }, { "clip_ratio/high_max": 0.001964564173249528, "clip_ratio/high_mean": 0.0007279488872882212, "clip_ratio/low_mean": 0.00054842967233526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012763785634888336, "epoch": 12.382507288629737, "grad_norm": 0.23281016945838928, "learning_rate": 1e-06, "loss": -0.0277, "step": 1270 }, { "clip_ratio/high_max": 0.0022475848672911525, "clip_ratio/high_mean": 0.0008043856323638465, "clip_ratio/low_mean": 0.0004615676580215222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012659532840189058, "epoch": 12.391836734693877, "grad_norm": 0.22291173040866852, "learning_rate": 1e-06, "loss": -0.0463, "step": 1271 }, { "clip_ratio/high_max": 0.001995474740397185, "clip_ratio/high_mean": 0.000631604320005863, "clip_ratio/low_mean": 0.00047593826548109064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011075425682065543, "epoch": 12.401166180758018, "grad_norm": 0.18760299682617188, "learning_rate": 1e-06, "loss": -0.0435, "step": 1272 }, { "clip_ratio/high_max": 0.0015624421284883283, "clip_ratio/high_mean": 0.0006718685681335046, "clip_ratio/low_mean": 0.00047948315432222444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011513517074490665, "epoch": 12.410495626822158, "grad_norm": 0.19938701391220093, "learning_rate": 1e-06, "loss": -0.0504, "step": 1273 }, { "clip_ratio/high_max": 0.0024219072511186823, "clip_ratio/high_mean": 0.0008373054479307029, "clip_ratio/low_mean": 0.0005942824282101355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014315878725028597, "epoch": 12.419825072886297, "grad_norm": 0.19185805320739746, "learning_rate": 1e-06, "loss": -0.0724, "step": 1274 }, { "clip_ratio/high_max": 0.0024082592244667467, "clip_ratio/high_mean": 0.0008695133055880433, "clip_ratio/low_mean": 0.000638645905382873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001508159202785464, "epoch": 12.429154518950437, "grad_norm": 0.20501326024532318, "learning_rate": 1e-06, "loss": -0.0216, "step": 1275 }, { "clip_ratio/high_max": 0.0022570032015210018, "clip_ratio/high_mean": 0.0008502471300744219, "clip_ratio/low_mean": 0.0006033729296177626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014536200869770255, "epoch": 12.438483965014576, "grad_norm": 0.19590596854686737, "learning_rate": 1e-06, "loss": -0.0658, "step": 1276 }, { "clip_ratio/high_max": 0.0019272163335699588, "clip_ratio/high_mean": 0.0007164357266447041, "clip_ratio/low_mean": 0.0006212411008164054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001337676847469993, "epoch": 12.447813411078718, "grad_norm": 0.17627117037773132, "learning_rate": 1e-06, "loss": -0.0162, "step": 1277 }, { "clip_ratio/high_max": 0.0021971431433485122, "clip_ratio/high_mean": 0.0007884639712756325, "clip_ratio/low_mean": 0.000539641670002311, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013281056817504577, "epoch": 12.457142857142857, "grad_norm": 0.19206470251083374, "learning_rate": 1e-06, "loss": -0.0505, "step": 1278 }, { "clip_ratio/high_max": 0.0019924480438930914, "clip_ratio/high_mean": 0.0007465372227670741, "clip_ratio/low_mean": 0.0005273530055092124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012738902260025498, "epoch": 12.466472303206997, "grad_norm": 0.1521359384059906, "learning_rate": 1e-06, "loss": -0.0374, "step": 1279 }, { "clip_ratio/high_max": 0.002187578455050243, "clip_ratio/high_mean": 0.0008908524723665323, "clip_ratio/low_mean": 0.0006226276618690463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015134801105887163, "epoch": 12.475801749271136, "grad_norm": 0.22307853400707245, "learning_rate": 1e-06, "loss": -0.0279, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0924246651785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 877.0380249023438, "completions/mean_terminated_length": 549.2288818359375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 12.485131195335278, "frac_reward_zero_std": 0.6886160969734192, "grad_norm": 0.2933986485004425, "learning_rate": 1e-06, "loss": -0.0699, "num_tokens": 732145816.0, "reward": 0.65966796875, "reward_std": 0.13660022616386414, "rewards/simpleverify_reward/mean": 0.65966796875, "rewards/simpleverify_reward/std": 0.47383731603622437, "step": 1281 }, { "clip_ratio/high_max": 0.001553862670334638, "clip_ratio/high_mean": 0.0005461988130264217, "clip_ratio/low_mean": 0.0003280465198258753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008742453483137069, "epoch": 12.494460641399417, "grad_norm": 0.16881747543811798, "learning_rate": 1e-06, "loss": -0.0147, "step": 1282 }, { "clip_ratio/high_max": 0.0016623562514723744, "clip_ratio/high_mean": 0.0006345246147247963, "clip_ratio/low_mean": 0.00046207835839595646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010966029731207527, "epoch": 12.503790087463557, "grad_norm": 0.2437950074672699, "learning_rate": 1e-06, "loss": -0.0059, "step": 1283 }, { "clip_ratio/high_max": 0.0022196756908670068, "clip_ratio/high_mean": 0.0007566148469777545, "clip_ratio/low_mean": 0.0003717366197406591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001128351483203005, "epoch": 12.513119533527696, "grad_norm": 0.21430031955242157, "learning_rate": 1e-06, "loss": -0.0635, "step": 1284 }, { "clip_ratio/high_max": 0.0018538931180955842, "clip_ratio/high_mean": 0.0006546167387568858, "clip_ratio/low_mean": 0.00041826089545793366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010728776614996605, "epoch": 12.522448979591836, "grad_norm": 0.15151603519916534, "learning_rate": 1e-06, "loss": -0.0468, "step": 1285 }, { "clip_ratio/high_max": 0.001978941545530688, "clip_ratio/high_mean": 0.0008128459912768449, "clip_ratio/low_mean": 0.0004426341074577067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012554801323858555, "epoch": 12.531778425655977, "grad_norm": 0.17239078879356384, "learning_rate": 1e-06, "loss": -0.0616, "step": 1286 }, { "clip_ratio/high_max": 0.002029980532824993, "clip_ratio/high_mean": 0.0007027474102869746, "clip_ratio/low_mean": 0.0004793586113009951, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011821060397778638, "epoch": 12.541107871720117, "grad_norm": 0.2156786173582077, "learning_rate": 1e-06, "loss": -0.0746, "step": 1287 }, { "clip_ratio/high_max": 0.0018668724660528824, "clip_ratio/high_mean": 0.0006686084670946002, "clip_ratio/low_mean": 0.0005503273996509961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012189358712930698, "epoch": 12.550437317784256, "grad_norm": 0.2202305793762207, "learning_rate": 1e-06, "loss": -0.0268, "step": 1288 }, { "clip_ratio/high_max": 0.0019567053896025755, "clip_ratio/high_mean": 0.0007400583817798179, "clip_ratio/low_mean": 0.00040682721157736523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011468855809653178, "epoch": 12.559766763848396, "grad_norm": 0.5505403876304626, "learning_rate": 1e-06, "loss": -0.0648, "step": 1289 }, { "clip_ratio/high_max": 0.001842635447246721, "clip_ratio/high_mean": 0.0006282628601184115, "clip_ratio/low_mean": 0.0005738339355048083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012020967915304936, "epoch": 12.569096209912537, "grad_norm": 0.18688492476940155, "learning_rate": 1e-06, "loss": 0.0061, "step": 1290 }, { "clip_ratio/high_max": 0.0016989778669085354, "clip_ratio/high_mean": 0.0007147474325392977, "clip_ratio/low_mean": 0.0006003432336001424, "clip_ratio/low_min": 1.439428797311848e-05, "clip_ratio/region_mean": 0.0013150906670489348, "epoch": 12.578425655976677, "grad_norm": 0.19543525576591492, "learning_rate": 1e-06, "loss": -0.0427, "step": 1291 }, { "clip_ratio/high_max": 0.002034267316048499, "clip_ratio/high_mean": 0.0008492634970025392, "clip_ratio/low_mean": 0.0007098744808899937, "clip_ratio/low_min": 1.8110693417838775e-05, "clip_ratio/region_mean": 0.0015591379924444482, "epoch": 12.587755102040816, "grad_norm": 0.19996732473373413, "learning_rate": 1e-06, "loss": -0.0221, "step": 1292 }, { "clip_ratio/high_max": 0.002051561350526754, "clip_ratio/high_mean": 0.0008100315699266503, "clip_ratio/low_mean": 0.0007592768142785644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015693083842052147, "epoch": 12.597084548104956, "grad_norm": 0.32189008593559265, "learning_rate": 1e-06, "loss": -0.0068, "step": 1293 }, { "clip_ratio/high_max": 0.002096858384902589, "clip_ratio/high_mean": 0.0007367674543274916, "clip_ratio/low_mean": 0.000531627687450964, "clip_ratio/low_min": 2.37371823459398e-05, "clip_ratio/region_mean": 0.001268395149963908, "epoch": 12.606413994169095, "grad_norm": 0.19886203110218048, "learning_rate": 1e-06, "loss": -0.0513, "step": 1294 }, { "clip_ratio/high_max": 0.0021457276452565566, "clip_ratio/high_mean": 0.0007864580493333051, "clip_ratio/low_mean": 0.0006929837909410708, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014794418311794288, "epoch": 12.615743440233237, "grad_norm": 0.19232389330863953, "learning_rate": 1e-06, "loss": -0.0516, "step": 1295 }, { "clip_ratio/high_max": 0.002114567130774958, "clip_ratio/high_mean": 0.0008644765784993069, "clip_ratio/low_mean": 0.0005679238238371909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014324003932415508, "epoch": 12.625072886297376, "grad_norm": 0.17667897045612335, "learning_rate": 1e-06, "loss": -0.0812, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0887974330357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 857.4168090820312, "completions/mean_terminated_length": 541.814208984375, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 12.634402332361516, "frac_reward_zero_std": 0.680245578289032, "grad_norm": 0.20789079368114471, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 740665439.0, "reward": 0.6918945908546448, "reward_std": 0.1395050585269928, "rewards/simpleverify_reward/mean": 0.69189453125, "rewards/simpleverify_reward/std": 0.46172648668289185, "step": 1297 }, { "clip_ratio/high_max": 0.0018050224462058395, "clip_ratio/high_mean": 0.0007122922561393352, "clip_ratio/low_mean": 0.0003167192121509288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010290114551025908, "epoch": 12.643731778425655, "grad_norm": 0.22109881043434143, "learning_rate": 1e-06, "loss": -0.104, "step": 1298 }, { "clip_ratio/high_max": 0.0019477952700981405, "clip_ratio/high_mean": 0.0006712931972288061, "clip_ratio/low_mean": 0.0004684924915636657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011397856906114612, "epoch": 12.653061224489797, "grad_norm": 0.21071778237819672, "learning_rate": 1e-06, "loss": -0.0079, "step": 1299 }, { "clip_ratio/high_max": 0.0022304194535536226, "clip_ratio/high_mean": 0.0008130201531457715, "clip_ratio/low_mean": 0.0005009483065805398, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013139684815541841, "epoch": 12.662390670553936, "grad_norm": 0.1897917538881302, "learning_rate": 1e-06, "loss": -0.057, "step": 1300 }, { "clip_ratio/high_max": 0.0022618895236519165, "clip_ratio/high_mean": 0.0008512171534675872, "clip_ratio/low_mean": 0.00031566884399580886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011668859879137017, "epoch": 12.671720116618076, "grad_norm": 0.15955333411693573, "learning_rate": 1e-06, "loss": -0.0445, "step": 1301 }, { "clip_ratio/high_max": 0.0017930826616066042, "clip_ratio/high_mean": 0.0006664723532594508, "clip_ratio/low_mean": 0.0004270944796189724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010935668142337818, "epoch": 12.681049562682215, "grad_norm": 1.0878920555114746, "learning_rate": 1e-06, "loss": -0.0461, "step": 1302 }, { "clip_ratio/high_max": 0.0019235741347074509, "clip_ratio/high_mean": 0.0007042183206067421, "clip_ratio/low_mean": 0.000536266517428885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012404848421283532, "epoch": 12.690379008746355, "grad_norm": 0.17001205682754517, "learning_rate": 1e-06, "loss": -0.0642, "step": 1303 }, { "clip_ratio/high_max": 0.0019014153731404804, "clip_ratio/high_mean": 0.0007952415526233381, "clip_ratio/low_mean": 0.0005373340236474178, "clip_ratio/low_min": 4.378544144856278e-05, "clip_ratio/region_mean": 0.0013325755862751976, "epoch": 12.699708454810496, "grad_norm": 0.1880332976579666, "learning_rate": 1e-06, "loss": -0.0544, "step": 1304 }, { "clip_ratio/high_max": 0.0018490986403776333, "clip_ratio/high_mean": 0.0007373326443484984, "clip_ratio/low_mean": 0.0005392052398747182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012765378814947326, "epoch": 12.709037900874636, "grad_norm": 6.3806915283203125, "learning_rate": 1e-06, "loss": -0.0263, "step": 1305 }, { "clip_ratio/high_max": 0.0018855677408282645, "clip_ratio/high_mean": 0.000726059353837627, "clip_ratio/low_mean": 0.0005285393749545619, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012545987447083462, "epoch": 12.718367346938775, "grad_norm": 0.2179526388645172, "learning_rate": 1e-06, "loss": -0.0129, "step": 1306 }, { "clip_ratio/high_max": 0.0019773441836150596, "clip_ratio/high_mean": 0.0007398475663649151, "clip_ratio/low_mean": 0.0005974017744847515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001337249314019573, "epoch": 12.727696793002915, "grad_norm": 0.16485042870044708, "learning_rate": 1e-06, "loss": -0.0348, "step": 1307 }, { "clip_ratio/high_max": 0.0018334581254748628, "clip_ratio/high_mean": 0.0007830732756701764, "clip_ratio/low_mean": 0.0006382422229762597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014213155300240032, "epoch": 12.737026239067056, "grad_norm": 0.21194931864738464, "learning_rate": 1e-06, "loss": -0.0363, "step": 1308 }, { "clip_ratio/high_max": 0.0022234173120523337, "clip_ratio/high_mean": 0.0008709380235814024, "clip_ratio/low_mean": 0.0006635431254835567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015344811363320332, "epoch": 12.746355685131196, "grad_norm": 0.2323385328054428, "learning_rate": 1e-06, "loss": -0.0594, "step": 1309 }, { "clip_ratio/high_max": 0.002550790763052646, "clip_ratio/high_mean": 0.0010000408383348258, "clip_ratio/low_mean": 0.0006212839830368466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016213248200074304, "epoch": 12.755685131195335, "grad_norm": 0.30275416374206543, "learning_rate": 1e-06, "loss": -0.059, "step": 1310 }, { "clip_ratio/high_max": 0.001947471402672818, "clip_ratio/high_mean": 0.0007222098938655108, "clip_ratio/low_mean": 0.0006263620075515064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013485718845913652, "epoch": 12.765014577259475, "grad_norm": 0.15164776146411896, "learning_rate": 1e-06, "loss": -0.0194, "step": 1311 }, { "clip_ratio/high_max": 0.0020302615157561377, "clip_ratio/high_mean": 0.0009159197797998786, "clip_ratio/low_mean": 0.0008374620465474436, "clip_ratio/low_min": 2.7976966521237046e-05, "clip_ratio/region_mean": 0.0017533817881485447, "epoch": 12.774344023323614, "grad_norm": 0.2167823314666748, "learning_rate": 1e-06, "loss": -0.0365, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0812639508928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 830.3453369140625, "completions/mean_terminated_length": 541.4918823242188, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 12.783673469387756, "frac_reward_zero_std": 0.6941964626312256, "grad_norm": 0.20548588037490845, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 749233445.0, "reward": 0.6815011501312256, "reward_std": 0.13402113318443298, "rewards/simpleverify_reward/mean": 0.6815010905265808, "rewards/simpleverify_reward/std": 0.4659103751182556, "step": 1313 }, { "clip_ratio/high_max": 0.001842769965151092, "clip_ratio/high_mean": 0.0006516448956972454, "clip_ratio/low_mean": 0.0003792517190959188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001030896633892553, "epoch": 12.793002915451895, "grad_norm": 0.18886005878448486, "learning_rate": 1e-06, "loss": -0.0316, "step": 1314 }, { "clip_ratio/high_max": 0.0020060121241840534, "clip_ratio/high_mean": 0.0007877415919210762, "clip_ratio/low_mean": 0.0003669939919745957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011547355679795146, "epoch": 12.802332361516035, "grad_norm": 0.19845743477344513, "learning_rate": 1e-06, "loss": -0.0852, "step": 1315 }, { "clip_ratio/high_max": 0.0018286328268004581, "clip_ratio/high_mean": 0.0007401180500892224, "clip_ratio/low_mean": 0.00034334561223658966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010834636996150948, "epoch": 12.811661807580174, "grad_norm": 0.5848187208175659, "learning_rate": 1e-06, "loss": -0.0424, "step": 1316 }, { "clip_ratio/high_max": 0.0017862427248473978, "clip_ratio/high_mean": 0.0006805673519920674, "clip_ratio/low_mean": 0.0003161121417178947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009966794750653207, "epoch": 12.820991253644316, "grad_norm": 0.17385590076446533, "learning_rate": 1e-06, "loss": -0.0609, "step": 1317 }, { "clip_ratio/high_max": 0.0019365608422958758, "clip_ratio/high_mean": 0.0007171701472543646, "clip_ratio/low_mean": 0.00044769211763195926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011648622712527867, "epoch": 12.830320699708455, "grad_norm": 0.16698431968688965, "learning_rate": 1e-06, "loss": -0.0651, "step": 1318 }, { "clip_ratio/high_max": 0.0017832327430369332, "clip_ratio/high_mean": 0.0007963197567733005, "clip_ratio/low_mean": 0.0006095319658925291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014058517590456177, "epoch": 12.839650145772595, "grad_norm": 0.19403690099716187, "learning_rate": 1e-06, "loss": -0.023, "step": 1319 }, { "clip_ratio/high_max": 0.0018761529645416886, "clip_ratio/high_mean": 0.0007689509347983403, "clip_ratio/low_mean": 0.0005789751521660946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013479260924214032, "epoch": 12.848979591836734, "grad_norm": 1.1981221437454224, "learning_rate": 1e-06, "loss": -0.0348, "step": 1320 }, { "clip_ratio/high_max": 0.002345436681935098, "clip_ratio/high_mean": 0.0009284609004680533, "clip_ratio/low_mean": 0.0007350173646045732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016634782004985027, "epoch": 12.858309037900874, "grad_norm": 0.21019859611988068, "learning_rate": 1e-06, "loss": -0.0183, "step": 1321 }, { "clip_ratio/high_max": 0.0020147938703303225, "clip_ratio/high_mean": 0.0007537715164289693, "clip_ratio/low_mean": 0.0004522964163697907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012060679400747176, "epoch": 12.867638483965015, "grad_norm": 0.19221578538417816, "learning_rate": 1e-06, "loss": -0.0462, "step": 1322 }, { "clip_ratio/high_max": 0.002354365373321343, "clip_ratio/high_mean": 0.0008665500663482817, "clip_ratio/low_mean": 0.0006423975883080857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015089476328284945, "epoch": 12.876967930029155, "grad_norm": 0.19525384902954102, "learning_rate": 1e-06, "loss": -0.028, "step": 1323 }, { "clip_ratio/high_max": 0.0019264610382379033, "clip_ratio/high_mean": 0.0007888084819569485, "clip_ratio/low_mean": 0.0006622404507652391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014510489490930922, "epoch": 12.886297376093294, "grad_norm": 0.14993880689144135, "learning_rate": 1e-06, "loss": -0.0238, "step": 1324 }, { "clip_ratio/high_max": 0.00236386081087403, "clip_ratio/high_mean": 0.0009141868813458132, "clip_ratio/low_mean": 0.0005991338566673221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015133207598410081, "epoch": 12.895626822157434, "grad_norm": 0.2290256768465042, "learning_rate": 1e-06, "loss": -0.0233, "step": 1325 }, { "clip_ratio/high_max": 0.0020213787065586075, "clip_ratio/high_mean": 0.0008087689438980306, "clip_ratio/low_mean": 0.0006325821309474122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014413511016755365, "epoch": 13.00932944606414, "grad_norm": 0.2462291121482849, "learning_rate": 1e-06, "loss": -0.0287, "step": 1326 }, { "clip_ratio/high_max": 0.002078212106425781, "clip_ratio/high_mean": 0.0009502039592916844, "clip_ratio/low_mean": 0.000811991905720788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017621959013922606, "epoch": 13.018658892128279, "grad_norm": 0.15808609127998352, "learning_rate": 1e-06, "loss": -0.0462, "step": 1327 }, { "clip_ratio/high_max": 0.002286901326442603, "clip_ratio/high_mean": 0.0008178995303751435, "clip_ratio/low_mean": 0.000687358051436604, "clip_ratio/low_min": 1.3688129911315627e-05, "clip_ratio/region_mean": 0.0015052575909066945, "epoch": 13.02798833819242, "grad_norm": 0.20424821972846985, "learning_rate": 1e-06, "loss": -0.0096, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0900530133928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 860.0816650390625, "completions/mean_terminated_length": 539.8385620117188, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 13.03731778425656, "frac_reward_zero_std": 0.6925223469734192, "grad_norm": 0.20466329157352448, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 757706847.0, "reward": 0.6752930283546448, "reward_std": 0.1359356790781021, "rewards/simpleverify_reward/mean": 0.67529296875, "rewards/simpleverify_reward/std": 0.4682815968990326, "step": 1329 }, { "clip_ratio/high_max": 0.0017114565816882532, "clip_ratio/high_mean": 0.0006926083706275676, "clip_ratio/low_mean": 0.000350292497614646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010429008507344406, "epoch": 13.0466472303207, "grad_norm": 0.19678793847560883, "learning_rate": 1e-06, "loss": -0.0529, "step": 1330 }, { "clip_ratio/high_max": 0.0016368978904210962, "clip_ratio/high_mean": 0.0006627846305491403, "clip_ratio/low_mean": 0.00036026951966050547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010230541538476245, "epoch": 13.055976676384839, "grad_norm": 0.17557364702224731, "learning_rate": 1e-06, "loss": -0.0487, "step": 1331 }, { "clip_ratio/high_max": 0.0016518218799319584, "clip_ratio/high_mean": 0.000629877684332314, "clip_ratio/low_mean": 0.0004216616334815626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010515393150853924, "epoch": 13.06530612244898, "grad_norm": 0.32792195677757263, "learning_rate": 1e-06, "loss": -0.0372, "step": 1332 }, { "clip_ratio/high_max": 0.0016667325216985773, "clip_ratio/high_mean": 0.0006572093243448762, "clip_ratio/low_mean": 0.0003726963686858653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010299057030351833, "epoch": 13.07463556851312, "grad_norm": 0.21191255748271942, "learning_rate": 1e-06, "loss": -0.0269, "step": 1333 }, { "clip_ratio/high_max": 0.001905270131828729, "clip_ratio/high_mean": 0.0006204715627973201, "clip_ratio/low_mean": 0.00038658230732835364, "clip_ratio/low_min": 1.1265320608799811e-05, "clip_ratio/region_mean": 0.0010070538482978009, "epoch": 13.08396501457726, "grad_norm": 0.20675736665725708, "learning_rate": 1e-06, "loss": -0.002, "step": 1334 }, { "clip_ratio/high_max": 0.0018405013033770956, "clip_ratio/high_mean": 0.0007104899050318636, "clip_ratio/low_mean": 0.00046826972538838163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001178759670438012, "epoch": 13.093294460641399, "grad_norm": 0.19805321097373962, "learning_rate": 1e-06, "loss": -0.0155, "step": 1335 }, { "clip_ratio/high_max": 0.0018314524058951065, "clip_ratio/high_mean": 0.000723695486158249, "clip_ratio/low_mean": 0.000409030583796266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001132726072682999, "epoch": 13.102623906705539, "grad_norm": 0.17753343284130096, "learning_rate": 1e-06, "loss": -0.0411, "step": 1336 }, { "clip_ratio/high_max": 0.0019907858877559192, "clip_ratio/high_mean": 0.0008095506000245223, "clip_ratio/low_mean": 0.00046900861661924864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012785592225554865, "epoch": 13.11195335276968, "grad_norm": 0.19523467123508453, "learning_rate": 1e-06, "loss": -0.0474, "step": 1337 }, { "clip_ratio/high_max": 0.0019303306071378756, "clip_ratio/high_mean": 0.0007724703918938758, "clip_ratio/low_mean": 0.0004418903340592806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012143607491452713, "epoch": 13.12128279883382, "grad_norm": 0.2226903736591339, "learning_rate": 1e-06, "loss": -0.1006, "step": 1338 }, { "clip_ratio/high_max": 0.0017630476868362166, "clip_ratio/high_mean": 0.000704325641891046, "clip_ratio/low_mean": 0.0005811097889818484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012854354190494632, "epoch": 13.130612244897959, "grad_norm": 0.18350651860237122, "learning_rate": 1e-06, "loss": -0.0404, "step": 1339 }, { "clip_ratio/high_max": 0.001960410754691111, "clip_ratio/high_mean": 0.0006900240659888368, "clip_ratio/low_mean": 0.0005121564763612696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012021805268886965, "epoch": 13.139941690962099, "grad_norm": 0.18244904279708862, "learning_rate": 1e-06, "loss": -0.0372, "step": 1340 }, { "clip_ratio/high_max": 0.002013174118474126, "clip_ratio/high_mean": 0.0008127215587592218, "clip_ratio/low_mean": 0.00060325165213726, "clip_ratio/low_min": 3.375337473698892e-05, "clip_ratio/region_mean": 0.0014159732236294076, "epoch": 13.14927113702624, "grad_norm": 0.37285706400871277, "learning_rate": 1e-06, "loss": -0.0411, "step": 1341 }, { "clip_ratio/high_max": 0.0017524788636364974, "clip_ratio/high_mean": 0.000776766461058287, "clip_ratio/low_mean": 0.0006055162184566143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013822826877003536, "epoch": 13.15860058309038, "grad_norm": 0.1887950748205185, "learning_rate": 1e-06, "loss": -0.0533, "step": 1342 }, { "clip_ratio/high_max": 0.002297910825291183, "clip_ratio/high_mean": 0.0008510644320267602, "clip_ratio/low_mean": 0.0005853244374520727, "clip_ratio/low_min": 3.1375500839203596e-05, "clip_ratio/region_mean": 0.0014363888549269177, "epoch": 13.167930029154519, "grad_norm": 0.1578933149576187, "learning_rate": 1e-06, "loss": -0.0475, "step": 1343 }, { "clip_ratio/high_max": 0.002204702905146405, "clip_ratio/high_mean": 0.0008340773965755943, "clip_ratio/low_mean": 0.0006284602914092829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014625376788899302, "epoch": 13.177259475218658, "grad_norm": 0.20534676313400269, "learning_rate": 1e-06, "loss": -0.0624, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0865652901785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 842.159912109375, "completions/mean_terminated_length": 533.7966918945312, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 13.186588921282798, "frac_reward_zero_std": 0.7008928656578064, "grad_norm": 0.18206001818180084, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 766134939.0, "reward": 0.6795480251312256, "reward_std": 0.1296139508485794, "rewards/simpleverify_reward/mean": 0.6795479655265808, "rewards/simpleverify_reward/std": 0.466666579246521, "step": 1345 }, { "clip_ratio/high_max": 0.0016556840237171855, "clip_ratio/high_mean": 0.0006014426144247409, "clip_ratio/low_mean": 0.00039175458459794754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009931972181220772, "epoch": 13.19591836734694, "grad_norm": 0.19763682782649994, "learning_rate": 1e-06, "loss": -0.0432, "step": 1346 }, { "clip_ratio/high_max": 0.0019967887637903914, "clip_ratio/high_mean": 0.000696597800924792, "clip_ratio/low_mean": 0.0004924213271806366, "clip_ratio/low_min": 1.461646388634108e-05, "clip_ratio/region_mean": 0.001189019123557955, "epoch": 13.205247813411079, "grad_norm": 0.2085253745317459, "learning_rate": 1e-06, "loss": -0.0543, "step": 1347 }, { "clip_ratio/high_max": 0.001554494141601026, "clip_ratio/high_mean": 0.0006068989805498859, "clip_ratio/low_mean": 0.0004365037830211804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001043402764480561, "epoch": 13.214577259475218, "grad_norm": 0.16386951506137848, "learning_rate": 1e-06, "loss": -0.0546, "step": 1348 }, { "clip_ratio/high_max": 0.0015665139799239114, "clip_ratio/high_mean": 0.0005109015319249011, "clip_ratio/low_mean": 0.00032869324923012755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000839594766148366, "epoch": 13.223906705539358, "grad_norm": 0.17842639982700348, "learning_rate": 1e-06, "loss": -0.0419, "step": 1349 }, { "clip_ratio/high_max": 0.0018018074770225212, "clip_ratio/high_mean": 0.0006128853060545225, "clip_ratio/low_mean": 0.00041749956380954245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010303848757757805, "epoch": 13.2332361516035, "grad_norm": 0.22414632141590118, "learning_rate": 1e-06, "loss": -0.0232, "step": 1350 }, { "clip_ratio/high_max": 0.001860641463281354, "clip_ratio/high_mean": 0.0006608594021599856, "clip_ratio/low_mean": 0.0005077930100014783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011686523866956122, "epoch": 13.242565597667639, "grad_norm": 0.186594158411026, "learning_rate": 1e-06, "loss": -0.0158, "step": 1351 }, { "clip_ratio/high_max": 0.002209053360274993, "clip_ratio/high_mean": 0.0008672655421833042, "clip_ratio/low_mean": 0.0004207341094115691, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00128799965386861, "epoch": 13.251895043731778, "grad_norm": 0.1997538059949875, "learning_rate": 1e-06, "loss": -0.0753, "step": 1352 }, { "clip_ratio/high_max": 0.002198517704528058, "clip_ratio/high_mean": 0.0006889375908940565, "clip_ratio/low_mean": 0.000452739329830365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001141676944826031, "epoch": 13.261224489795918, "grad_norm": 0.2018834352493286, "learning_rate": 1e-06, "loss": -0.0603, "step": 1353 }, { "clip_ratio/high_max": 0.0019277143437648192, "clip_ratio/high_mean": 0.000644405762614042, "clip_ratio/low_mean": 0.0006449455213441979, "clip_ratio/low_min": 2.797672277665697e-05, "clip_ratio/region_mean": 0.0012893512794107664, "epoch": 13.270553935860057, "grad_norm": 0.20657019317150116, "learning_rate": 1e-06, "loss": -0.0048, "step": 1354 }, { "clip_ratio/high_max": 0.0018908252714027185, "clip_ratio/high_mean": 0.0006713224247505423, "clip_ratio/low_mean": 0.0005752235147156171, "clip_ratio/low_min": 1.147842067439342e-05, "clip_ratio/region_mean": 0.0012465459549275693, "epoch": 13.279883381924199, "grad_norm": 0.2469731569290161, "learning_rate": 1e-06, "loss": -0.0468, "step": 1355 }, { "clip_ratio/high_max": 0.0015967805666150525, "clip_ratio/high_mean": 0.0006311222177828313, "clip_ratio/low_mean": 0.0005005188741051825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011316410855215508, "epoch": 13.289212827988338, "grad_norm": 0.23019935190677643, "learning_rate": 1e-06, "loss": -0.0514, "step": 1356 }, { "clip_ratio/high_max": 0.0022638098744209856, "clip_ratio/high_mean": 0.0008746571347728604, "clip_ratio/low_mean": 0.0004614502415734023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001336107357928995, "epoch": 13.298542274052478, "grad_norm": 0.23386602103710175, "learning_rate": 1e-06, "loss": -0.0818, "step": 1357 }, { "clip_ratio/high_max": 0.0019541983638191596, "clip_ratio/high_mean": 0.0007459450616806862, "clip_ratio/low_mean": 0.0008139819228745182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015599269390804693, "epoch": 13.307871720116617, "grad_norm": 0.2767294943332672, "learning_rate": 1e-06, "loss": -0.0152, "step": 1358 }, { "clip_ratio/high_max": 0.0021957795543130487, "clip_ratio/high_mean": 0.0007473764144378947, "clip_ratio/low_mean": 0.0007052739988466783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014526503946399316, "epoch": 13.317201166180759, "grad_norm": 0.17445966601371765, "learning_rate": 1e-06, "loss": -0.0208, "step": 1359 }, { "clip_ratio/high_max": 0.001974275117390789, "clip_ratio/high_mean": 0.0007834104580979329, "clip_ratio/low_mean": 0.0006955594949431543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014789699962420855, "epoch": 13.326530612244898, "grad_norm": 0.1825408637523651, "learning_rate": 1e-06, "loss": -0.0326, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.095703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 887.072021484375, "completions/mean_terminated_length": 547.4661865234375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 13.335860058309038, "frac_reward_zero_std": 0.6886160969734192, "grad_norm": 0.17827986180782318, "learning_rate": 1e-06, "loss": -0.0208, "num_tokens": 774659803.0, "reward": 0.6813616156578064, "reward_std": 0.13553670048713684, "rewards/simpleverify_reward/mean": 0.6813616156578064, "rewards/simpleverify_reward/std": 0.46596473455429077, "step": 1361 }, { "clip_ratio/high_max": 0.0018771966570056975, "clip_ratio/high_mean": 0.0007110922433639644, "clip_ratio/low_mean": 0.0003730745811481029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010841668226930778, "epoch": 13.345189504373177, "grad_norm": 0.19672629237174988, "learning_rate": 1e-06, "loss": -0.0654, "step": 1362 }, { "clip_ratio/high_max": 0.0017459440532547887, "clip_ratio/high_mean": 0.0005902329867240041, "clip_ratio/low_mean": 0.00033062258285099233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009208555857185274, "epoch": 13.354518950437317, "grad_norm": 0.18470989167690277, "learning_rate": 1e-06, "loss": -0.0482, "step": 1363 }, { "clip_ratio/high_max": 0.0018515688097977545, "clip_ratio/high_mean": 0.000696832043104223, "clip_ratio/low_mean": 0.0003295233818789711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010263554395351093, "epoch": 13.363848396501458, "grad_norm": 0.18431846797466278, "learning_rate": 1e-06, "loss": -0.0878, "step": 1364 }, { "clip_ratio/high_max": 0.001883689867099747, "clip_ratio/high_mean": 0.0006843670198577456, "clip_ratio/low_mean": 0.00047673171138740145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011610987112362636, "epoch": 13.373177842565598, "grad_norm": 0.20399200916290283, "learning_rate": 1e-06, "loss": -0.0467, "step": 1365 }, { "clip_ratio/high_max": 0.001973763333808165, "clip_ratio/high_mean": 0.0006505006640509237, "clip_ratio/low_mean": 0.000502481397234078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011529820749274222, "epoch": 13.382507288629737, "grad_norm": 0.23689232766628265, "learning_rate": 1e-06, "loss": -0.0052, "step": 1366 }, { "clip_ratio/high_max": 0.0019088768531219102, "clip_ratio/high_mean": 0.0006811818020651117, "clip_ratio/low_mean": 0.00037763923546663136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010588210388959851, "epoch": 13.391836734693877, "grad_norm": 0.17231902480125427, "learning_rate": 1e-06, "loss": -0.0427, "step": 1367 }, { "clip_ratio/high_max": 0.0020121252819080837, "clip_ratio/high_mean": 0.0006191072743604309, "clip_ratio/low_mean": 0.000550522667253972, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011696299479808658, "epoch": 13.401166180758018, "grad_norm": 0.18811094760894775, "learning_rate": 1e-06, "loss": -0.0225, "step": 1368 }, { "clip_ratio/high_max": 0.0023383412008115556, "clip_ratio/high_mean": 0.0008381554271181813, "clip_ratio/low_mean": 0.0004687783894041786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013069338419882115, "epoch": 13.410495626822158, "grad_norm": 0.16518598794937134, "learning_rate": 1e-06, "loss": -0.085, "step": 1369 }, { "clip_ratio/high_max": 0.002003098765271716, "clip_ratio/high_mean": 0.0007809425096638734, "clip_ratio/low_mean": 0.0004748828259835136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012558253438328393, "epoch": 13.419825072886297, "grad_norm": 0.18337808549404144, "learning_rate": 1e-06, "loss": -0.0499, "step": 1370 }, { "clip_ratio/high_max": 0.0016146569032571279, "clip_ratio/high_mean": 0.0006483035967903561, "clip_ratio/low_mean": 0.0007446501058439026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001392953719914658, "epoch": 13.429154518950437, "grad_norm": 0.1854764074087143, "learning_rate": 1e-06, "loss": -0.0124, "step": 1371 }, { "clip_ratio/high_max": 0.002308105271367822, "clip_ratio/high_mean": 0.000833231000797241, "clip_ratio/low_mean": 0.0005737554929510225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001406986513757147, "epoch": 13.438483965014576, "grad_norm": 0.18932315707206726, "learning_rate": 1e-06, "loss": -0.0133, "step": 1372 }, { "clip_ratio/high_max": 0.0021714463146054186, "clip_ratio/high_mean": 0.000913624793611234, "clip_ratio/low_mean": 0.0006095178714531357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015231426732498221, "epoch": 13.447813411078718, "grad_norm": 0.42570796608924866, "learning_rate": 1e-06, "loss": -0.0779, "step": 1373 }, { "clip_ratio/high_max": 0.002163319495593896, "clip_ratio/high_mean": 0.0008149010081979213, "clip_ratio/low_mean": 0.0006610048644688504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014759059085918125, "epoch": 13.457142857142857, "grad_norm": 0.18759815394878387, "learning_rate": 1e-06, "loss": -0.0599, "step": 1374 }, { "clip_ratio/high_max": 0.0018207975263067055, "clip_ratio/high_mean": 0.0007182547396951122, "clip_ratio/low_mean": 0.0006280955904003349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013463503491948359, "epoch": 13.466472303206997, "grad_norm": 8.677435874938965, "learning_rate": 1e-06, "loss": -0.0556, "step": 1375 }, { "clip_ratio/high_max": 0.0024918547715060413, "clip_ratio/high_mean": 0.0008239339294959791, "clip_ratio/low_mean": 0.0006222846805030713, "clip_ratio/low_min": 1.3864241736882832e-05, "clip_ratio/region_mean": 0.0014462186118180398, "epoch": 13.475801749271136, "grad_norm": 124.77651977539062, "learning_rate": 1e-06, "loss": -0.0467, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0897739955357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 857.284423828125, "completions/mean_terminated_length": 537.8555908203125, "completions/min_length": 98.0, "completions/min_terminated_length": 98.0, "epoch": 13.485131195335278, "frac_reward_zero_std": 0.684151828289032, "grad_norm": 0.2558850049972534, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 783083840.0, "reward": 0.6920340657234192, "reward_std": 0.13650250434875488, "rewards/simpleverify_reward/mean": 0.6920340657234192, "rewards/simpleverify_reward/std": 0.4616684913635254, "step": 1377 }, { "clip_ratio/high_max": 0.0018420344713376835, "clip_ratio/high_mean": 0.0006171226050355472, "clip_ratio/low_mean": 0.00035040737839153735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009675299552327488, "epoch": 13.494460641399417, "grad_norm": 0.18728506565093994, "learning_rate": 1e-06, "loss": -0.0153, "step": 1378 }, { "clip_ratio/high_max": 0.0018534900300437585, "clip_ratio/high_mean": 0.0007363579479715554, "clip_ratio/low_mean": 0.00029911759133938176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010354755613661837, "epoch": 13.503790087463557, "grad_norm": 0.19034084677696228, "learning_rate": 1e-06, "loss": -0.0897, "step": 1379 }, { "clip_ratio/high_max": 0.0021950987247691955, "clip_ratio/high_mean": 0.000868229879415594, "clip_ratio/low_mean": 0.00044377327139955014, "clip_ratio/low_min": 1.7170330465887673e-05, "clip_ratio/region_mean": 0.001312003136263229, "epoch": 13.513119533527696, "grad_norm": 0.36066243052482605, "learning_rate": 1e-06, "loss": -0.0562, "step": 1380 }, { "clip_ratio/high_max": 0.0017894415323098656, "clip_ratio/high_mean": 0.0007393136511382181, "clip_ratio/low_mean": 0.0004941404677083483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012334541061136406, "epoch": 13.522448979591836, "grad_norm": 0.3086172640323639, "learning_rate": 1e-06, "loss": -0.0397, "step": 1381 }, { "clip_ratio/high_max": 0.0018173929274780676, "clip_ratio/high_mean": 0.0006733811442245496, "clip_ratio/low_mean": 0.0005523957206605701, "clip_ratio/low_min": 1.9586335838539526e-05, "clip_ratio/region_mean": 0.0012257768357812893, "epoch": 13.531778425655977, "grad_norm": 0.22384636104106903, "learning_rate": 1e-06, "loss": -0.0387, "step": 1382 }, { "clip_ratio/high_max": 0.0022232008413993753, "clip_ratio/high_mean": 0.0008253825253632385, "clip_ratio/low_mean": 0.0005379586100389133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013633411545015406, "epoch": 13.541107871720117, "grad_norm": 0.19961266219615936, "learning_rate": 1e-06, "loss": -0.0624, "step": 1383 }, { "clip_ratio/high_max": 0.0019505457967170514, "clip_ratio/high_mean": 0.0007870148019719636, "clip_ratio/low_mean": 0.000665418327344014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014524331600114238, "epoch": 13.550437317784256, "grad_norm": 0.19811363518238068, "learning_rate": 1e-06, "loss": -0.0563, "step": 1384 }, { "clip_ratio/high_max": 0.002096112322760746, "clip_ratio/high_mean": 0.00070781026079203, "clip_ratio/low_mean": 0.0005765557089034701, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012843659678765107, "epoch": 13.559766763848396, "grad_norm": 0.2424916923046112, "learning_rate": 1e-06, "loss": 0.0006, "step": 1385 }, { "clip_ratio/high_max": 0.001746782330883434, "clip_ratio/high_mean": 0.0007567597831439343, "clip_ratio/low_mean": 0.0005286131793127424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012853729585913243, "epoch": 13.569096209912537, "grad_norm": 0.16925698518753052, "learning_rate": 1e-06, "loss": -0.0539, "step": 1386 }, { "clip_ratio/high_max": 0.0022242121485760435, "clip_ratio/high_mean": 0.0008703885559953051, "clip_ratio/low_mean": 0.0005720622302760603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014424507462535985, "epoch": 13.578425655976677, "grad_norm": 0.25664591789245605, "learning_rate": 1e-06, "loss": -0.0064, "step": 1387 }, { "clip_ratio/high_max": 0.002603350585559383, "clip_ratio/high_mean": 0.0008959293354564579, "clip_ratio/low_mean": 0.0007155920848163078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016115214457386173, "epoch": 13.587755102040816, "grad_norm": 0.22239696979522705, "learning_rate": 1e-06, "loss": -0.0237, "step": 1388 }, { "clip_ratio/high_max": 0.002196463326981757, "clip_ratio/high_mean": 0.0008739074801269453, "clip_ratio/low_mean": 0.000645370922939037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015192783903330564, "epoch": 13.597084548104956, "grad_norm": 0.16352330148220062, "learning_rate": 1e-06, "loss": -0.0411, "step": 1389 }, { "clip_ratio/high_max": 0.0021533740873564966, "clip_ratio/high_mean": 0.000873193546794937, "clip_ratio/low_mean": 0.0006800360197303235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001553229561977787, "epoch": 13.606413994169095, "grad_norm": 0.3293848931789398, "learning_rate": 1e-06, "loss": 0.0014, "step": 1390 }, { "clip_ratio/high_max": 0.002316572041308973, "clip_ratio/high_mean": 0.0009006091331684729, "clip_ratio/low_mean": 0.0006857771622890141, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001586386249982752, "epoch": 13.615743440233237, "grad_norm": 0.1799548864364624, "learning_rate": 1e-06, "loss": -0.0388, "step": 1391 }, { "clip_ratio/high_max": 0.0022457336926891003, "clip_ratio/high_mean": 0.0009236720152330236, "clip_ratio/low_mean": 0.0008246359539043624, "clip_ratio/low_min": 2.896200203394983e-05, "clip_ratio/region_mean": 0.0017483079864177853, "epoch": 13.625072886297376, "grad_norm": 0.2576116919517517, "learning_rate": 1e-06, "loss": -0.028, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0887974330357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 851.4766235351562, "completions/mean_terminated_length": 535.2951049804688, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 13.634402332361516, "frac_reward_zero_std": 0.7031250596046448, "grad_norm": 0.2661203145980835, "learning_rate": 1e-06, "loss": -0.0657, "num_tokens": 791543672.0, "reward": 0.6782227158546448, "reward_std": 0.12945175170898438, "rewards/simpleverify_reward/mean": 0.67822265625, "rewards/simpleverify_reward/std": 0.46717438101768494, "step": 1393 }, { "clip_ratio/high_max": 0.001726782134937821, "clip_ratio/high_mean": 0.0005982644715913921, "clip_ratio/low_mean": 0.0003843649337795796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009826294080994558, "epoch": 13.643731778425655, "grad_norm": 0.1838368922472, "learning_rate": 1e-06, "loss": -0.0442, "step": 1394 }, { "clip_ratio/high_max": 0.0016233746864600107, "clip_ratio/high_mean": 0.0006169249663798837, "clip_ratio/low_mean": 0.0003516446786306915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009685696568340063, "epoch": 13.653061224489797, "grad_norm": 0.19173717498779297, "learning_rate": 1e-06, "loss": -0.0403, "step": 1395 }, { "clip_ratio/high_max": 0.001790873739082599, "clip_ratio/high_mean": 0.0006623777253480512, "clip_ratio/low_mean": 0.00047928389358276036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001141661614383338, "epoch": 13.662390670553936, "grad_norm": 0.21622005105018616, "learning_rate": 1e-06, "loss": -0.0189, "step": 1396 }, { "clip_ratio/high_max": 0.0021918448437645566, "clip_ratio/high_mean": 0.00077125911047915, "clip_ratio/low_mean": 0.0004314951293054037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012027542288706172, "epoch": 13.671720116618076, "grad_norm": 0.1767806112766266, "learning_rate": 1e-06, "loss": -0.021, "step": 1397 }, { "clip_ratio/high_max": 0.002138713316526264, "clip_ratio/high_mean": 0.0007550313821411692, "clip_ratio/low_mean": 0.0005269123203106574, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012819437288271729, "epoch": 13.681049562682215, "grad_norm": 0.20593160390853882, "learning_rate": 1e-06, "loss": 0.0219, "step": 1398 }, { "clip_ratio/high_max": 0.0017885358247440308, "clip_ratio/high_mean": 0.0007708968205406563, "clip_ratio/low_mean": 0.0003902042685695051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011611010777414776, "epoch": 13.690379008746355, "grad_norm": 0.17368993163108826, "learning_rate": 1e-06, "loss": -0.0478, "step": 1399 }, { "clip_ratio/high_max": 0.0020661865928559564, "clip_ratio/high_mean": 0.0007511044241255149, "clip_ratio/low_mean": 0.00038841308878545533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011395175206416752, "epoch": 13.699708454810496, "grad_norm": 0.2607992887496948, "learning_rate": 1e-06, "loss": -0.0468, "step": 1400 }, { "clip_ratio/high_max": 0.0020573064393829554, "clip_ratio/high_mean": 0.0007715860338066705, "clip_ratio/low_mean": 0.0004949780322931474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012665640897466801, "epoch": 13.709037900874636, "grad_norm": 0.24999189376831055, "learning_rate": 1e-06, "loss": -0.0443, "step": 1401 }, { "clip_ratio/high_max": 0.002010807700571604, "clip_ratio/high_mean": 0.0006931375501153525, "clip_ratio/low_mean": 0.00031222947768583253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010053670484921895, "epoch": 13.718367346938775, "grad_norm": 0.17488498985767365, "learning_rate": 1e-06, "loss": -0.0787, "step": 1402 }, { "clip_ratio/high_max": 0.0017515971230750438, "clip_ratio/high_mean": 0.0007169433974922867, "clip_ratio/low_mean": 0.0005682648866240925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012852082909375895, "epoch": 13.727696793002915, "grad_norm": 0.1932821273803711, "learning_rate": 1e-06, "loss": -0.039, "step": 1403 }, { "clip_ratio/high_max": 0.0023224768810905516, "clip_ratio/high_mean": 0.0007893129004514776, "clip_ratio/low_mean": 0.0005942108264207491, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013835236968589015, "epoch": 13.737026239067056, "grad_norm": 0.19541144371032715, "learning_rate": 1e-06, "loss": -0.0369, "step": 1404 }, { "clip_ratio/high_max": 0.0019246608717367053, "clip_ratio/high_mean": 0.0007820594091754174, "clip_ratio/low_mean": 0.0005020139669795753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012840733870689292, "epoch": 13.746355685131196, "grad_norm": 0.2198079228401184, "learning_rate": 1e-06, "loss": -0.0472, "step": 1405 }, { "clip_ratio/high_max": 0.0019087051223323215, "clip_ratio/high_mean": 0.0006694439471175428, "clip_ratio/low_mean": 0.0006827560810052091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001352200037217699, "epoch": 13.755685131195335, "grad_norm": 0.19371594488620758, "learning_rate": 1e-06, "loss": -0.0265, "step": 1406 }, { "clip_ratio/high_max": 0.0021799889800604433, "clip_ratio/high_mean": 0.0008853858671500348, "clip_ratio/low_mean": 0.0005321165785971971, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014175024371070322, "epoch": 13.765014577259475, "grad_norm": 0.415070503950119, "learning_rate": 1e-06, "loss": -0.0276, "step": 1407 }, { "clip_ratio/high_max": 0.0021506094199139625, "clip_ratio/high_mean": 0.0007683547883061692, "clip_ratio/low_mean": 0.00040851662788554677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011768714284698945, "epoch": 13.774344023323614, "grad_norm": 0.15816786885261536, "learning_rate": 1e-06, "loss": -0.057, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0862165178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4027.0, "completions/mean_length": 848.202880859375, "completions/mean_terminated_length": 541.7694702148438, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 13.783673469387756, "frac_reward_zero_std": 0.6925223469734192, "grad_norm": 0.22124432027339935, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 800077428.0, "reward": 0.6740373969078064, "reward_std": 0.13188321888446808, "rewards/simpleverify_reward/mean": 0.6740373969078064, "rewards/simpleverify_reward/std": 0.4687497317790985, "step": 1409 }, { "clip_ratio/high_max": 0.001947058830410242, "clip_ratio/high_mean": 0.000681197949234047, "clip_ratio/low_mean": 0.0003564051598914375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010376031023042742, "epoch": 13.793002915451895, "grad_norm": 0.18066559731960297, "learning_rate": 1e-06, "loss": -0.0327, "step": 1410 }, { "clip_ratio/high_max": 0.002069079091597814, "clip_ratio/high_mean": 0.0007820590417395579, "clip_ratio/low_mean": 0.0003273061802246957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011093652246927377, "epoch": 13.802332361516035, "grad_norm": 0.16822905838489532, "learning_rate": 1e-06, "loss": -0.0289, "step": 1411 }, { "clip_ratio/high_max": 0.002052855197689496, "clip_ratio/high_mean": 0.0007084141725499649, "clip_ratio/low_mean": 0.0004367773112790019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011451914870121982, "epoch": 13.811661807580174, "grad_norm": 0.18910935521125793, "learning_rate": 1e-06, "loss": -0.0123, "step": 1412 }, { "clip_ratio/high_max": 0.001497191628004657, "clip_ratio/high_mean": 0.0006247097953746561, "clip_ratio/low_mean": 0.00049944660941037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001124156391597353, "epoch": 13.820991253644316, "grad_norm": 0.1753934919834137, "learning_rate": 1e-06, "loss": -0.0318, "step": 1413 }, { "clip_ratio/high_max": 0.0019512373328325339, "clip_ratio/high_mean": 0.0006782146620025742, "clip_ratio/low_mean": 0.00042470992457310786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011029245833924506, "epoch": 13.830320699708455, "grad_norm": 0.19961194694042206, "learning_rate": 1e-06, "loss": -0.0352, "step": 1414 }, { "clip_ratio/high_max": 0.0019277659557701554, "clip_ratio/high_mean": 0.0006895365004311316, "clip_ratio/low_mean": 0.0004838972417928744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001173433705844218, "epoch": 13.839650145772595, "grad_norm": 0.6055962443351746, "learning_rate": 1e-06, "loss": -0.0368, "step": 1415 }, { "clip_ratio/high_max": 0.0018232822840218432, "clip_ratio/high_mean": 0.0007213578155642608, "clip_ratio/low_mean": 0.0004787103353010025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001200068149046274, "epoch": 13.848979591836734, "grad_norm": 5.244761943817139, "learning_rate": 1e-06, "loss": -0.0486, "step": 1416 }, { "clip_ratio/high_max": 0.0018815018847817555, "clip_ratio/high_mean": 0.0008326008919539163, "clip_ratio/low_mean": 0.0006301960265773232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00146279693217366, "epoch": 13.858309037900874, "grad_norm": 0.25791963934898376, "learning_rate": 1e-06, "loss": -0.0221, "step": 1417 }, { "clip_ratio/high_max": 0.0017911772811203264, "clip_ratio/high_mean": 0.0006353265825964627, "clip_ratio/low_mean": 0.0004255555063537031, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00106088211396127, "epoch": 13.867638483965015, "grad_norm": 0.19523832201957703, "learning_rate": 1e-06, "loss": -0.0382, "step": 1418 }, { "clip_ratio/high_max": 0.0020433491590665653, "clip_ratio/high_mean": 0.0007078491071297321, "clip_ratio/low_mean": 0.0006967105673538754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014045596653886605, "epoch": 13.876967930029155, "grad_norm": 0.23308667540550232, "learning_rate": 1e-06, "loss": -0.0052, "step": 1419 }, { "clip_ratio/high_max": 0.0023139757686294615, "clip_ratio/high_mean": 0.0008805477500573033, "clip_ratio/low_mean": 0.0005325123304373847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014130600866337772, "epoch": 13.886297376093294, "grad_norm": 0.24983802437782288, "learning_rate": 1e-06, "loss": -0.06, "step": 1420 }, { "clip_ratio/high_max": 0.00198719068794162, "clip_ratio/high_mean": 0.0008013458109417115, "clip_ratio/low_mean": 0.0006900955349919968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014914413768565282, "epoch": 13.895626822157434, "grad_norm": 0.20356178283691406, "learning_rate": 1e-06, "loss": -0.0634, "step": 1421 }, { "clip_ratio/high_max": 0.0019110981047560927, "clip_ratio/high_mean": 0.0007937273012430524, "clip_ratio/low_mean": 0.0006920478808751795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014857751903036842, "epoch": 14.00932944606414, "grad_norm": 0.18858779966831207, "learning_rate": 1e-06, "loss": -0.0445, "step": 1422 }, { "clip_ratio/high_max": 0.0023262222021003254, "clip_ratio/high_mean": 0.0008728227530809818, "clip_ratio/low_mean": 0.0006736976665706607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015465203759958968, "epoch": 14.018658892128279, "grad_norm": 0.1979292780160904, "learning_rate": 1e-06, "loss": -0.0643, "step": 1423 }, { "clip_ratio/high_max": 0.0018310836458113045, "clip_ratio/high_mean": 0.0007760999205856933, "clip_ratio/low_mean": 0.00068665827075165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014627581731474493, "epoch": 14.02798833819242, "grad_norm": 0.19531936943531036, "learning_rate": 1e-06, "loss": -0.0228, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0911690848214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 858.0884399414062, "completions/mean_terminated_length": 533.2782592773438, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 14.03731778425656, "frac_reward_zero_std": 0.6936384439468384, "grad_norm": 0.2336753010749817, "learning_rate": 1e-06, "loss": -0.0292, "num_tokens": 808466055.0, "reward": 0.6879883408546448, "reward_std": 0.13458441197872162, "rewards/simpleverify_reward/mean": 0.68798828125, "rewards/simpleverify_reward/std": 0.4633307456970215, "step": 1425 }, { "clip_ratio/high_max": 0.0018024773635261226, "clip_ratio/high_mean": 0.0005839511095473426, "clip_ratio/low_mean": 0.0003313824972792645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009153335831797449, "epoch": 14.0466472303207, "grad_norm": 0.1913863569498062, "learning_rate": 1e-06, "loss": -0.0323, "step": 1426 }, { "clip_ratio/high_max": 0.0017235264785995241, "clip_ratio/high_mean": 0.0005563536942645442, "clip_ratio/low_mean": 0.0004411325789988041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009974862659873907, "epoch": 14.055976676384839, "grad_norm": 0.8144093155860901, "learning_rate": 1e-06, "loss": 0.0032, "step": 1427 }, { "clip_ratio/high_max": 0.0019478076392260846, "clip_ratio/high_mean": 0.0007525836954300757, "clip_ratio/low_mean": 0.0003370292110957962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010896128915192094, "epoch": 14.06530612244898, "grad_norm": 0.1697111427783966, "learning_rate": 1e-06, "loss": -0.1035, "step": 1428 }, { "clip_ratio/high_max": 0.0016983126661216374, "clip_ratio/high_mean": 0.0006528668709506746, "clip_ratio/low_mean": 0.00046980518732198107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011226720453123562, "epoch": 14.07463556851312, "grad_norm": 0.2407538890838623, "learning_rate": 1e-06, "loss": -0.0328, "step": 1429 }, { "clip_ratio/high_max": 0.0017502341142972, "clip_ratio/high_mean": 0.0007114781783457147, "clip_ratio/low_mean": 0.0004872427416557912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011987208999926224, "epoch": 14.08396501457726, "grad_norm": 0.6314725875854492, "learning_rate": 1e-06, "loss": -0.0585, "step": 1430 }, { "clip_ratio/high_max": 0.001897391543025151, "clip_ratio/high_mean": 0.0008374894969165325, "clip_ratio/low_mean": 0.0004173700172032113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012548595186672173, "epoch": 14.093294460641399, "grad_norm": 0.17496764659881592, "learning_rate": 1e-06, "loss": -0.0933, "step": 1431 }, { "clip_ratio/high_max": 0.002231113887319225, "clip_ratio/high_mean": 0.0008738246015127515, "clip_ratio/low_mean": 0.0004037829362459888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012776075418514665, "epoch": 14.102623906705539, "grad_norm": 0.20045115053653717, "learning_rate": 1e-06, "loss": -0.0723, "step": 1432 }, { "clip_ratio/high_max": 0.002093799710564781, "clip_ratio/high_mean": 0.0007480307240257389, "clip_ratio/low_mean": 0.000526084844750585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012741155478579458, "epoch": 14.11195335276968, "grad_norm": 0.200862854719162, "learning_rate": 1e-06, "loss": -0.0668, "step": 1433 }, { "clip_ratio/high_max": 0.0021731104898208287, "clip_ratio/high_mean": 0.000817226698018203, "clip_ratio/low_mean": 0.0004447158789844252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012619425724551547, "epoch": 14.12128279883382, "grad_norm": 0.16003991663455963, "learning_rate": 1e-06, "loss": -0.0663, "step": 1434 }, { "clip_ratio/high_max": 0.002377500095462892, "clip_ratio/high_mean": 0.0007874807561165653, "clip_ratio/low_mean": 0.0007688583518756786, "clip_ratio/low_min": 2.4930195650085807e-05, "clip_ratio/region_mean": 0.0015563391098112334, "epoch": 14.130612244897959, "grad_norm": 0.5989610552787781, "learning_rate": 1e-06, "loss": -0.0404, "step": 1435 }, { "clip_ratio/high_max": 0.0022689742800139356, "clip_ratio/high_mean": 0.0008419295954809058, "clip_ratio/low_mean": 0.0005149019198142923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013568314752774313, "epoch": 14.139941690962099, "grad_norm": 0.2528591752052307, "learning_rate": 1e-06, "loss": -0.039, "step": 1436 }, { "clip_ratio/high_max": 0.0024284263854497112, "clip_ratio/high_mean": 0.0008745589548198041, "clip_ratio/low_mean": 0.0007284145594894653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016029735234042164, "epoch": 14.14927113702624, "grad_norm": 0.173873633146286, "learning_rate": 1e-06, "loss": -0.018, "step": 1437 }, { "clip_ratio/high_max": 0.0020335266963229515, "clip_ratio/high_mean": 0.0008029279451875482, "clip_ratio/low_mean": 0.0005861036897840677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013890316295146476, "epoch": 14.15860058309038, "grad_norm": 0.16440361738204956, "learning_rate": 1e-06, "loss": -0.0368, "step": 1438 }, { "clip_ratio/high_max": 0.002567806812294293, "clip_ratio/high_mean": 0.0009201654356729705, "clip_ratio/low_mean": 0.0007668237612961093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016869891915121116, "epoch": 14.167930029154519, "grad_norm": 0.37390100955963135, "learning_rate": 1e-06, "loss": -0.0433, "step": 1439 }, { "clip_ratio/high_max": 0.0022097222463344224, "clip_ratio/high_mean": 0.0008789401799731422, "clip_ratio/low_mean": 0.0008024200251384173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016813602233014535, "epoch": 14.177259475218658, "grad_norm": 0.17662931978702545, "learning_rate": 1e-06, "loss": -0.0293, "step": 1440 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0855887276785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 839.576416015625, "completions/mean_terminated_length": 534.7756958007812, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 14.186588921282798, "frac_reward_zero_std": 0.6852678656578064, "grad_norm": 0.22853459417819977, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 816892998.0, "reward": 0.6811524033546448, "reward_std": 0.1364784836769104, "rewards/simpleverify_reward/mean": 0.68115234375, "rewards/simpleverify_reward/std": 0.4660460948944092, "step": 1441 }, { "clip_ratio/high_max": 0.001636913904803805, "clip_ratio/high_mean": 0.0006388185774994781, "clip_ratio/low_mean": 0.00038437889111264667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00102319744109991, "epoch": 14.19591836734694, "grad_norm": 0.23393473029136658, "learning_rate": 1e-06, "loss": -0.0517, "step": 1442 }, { "clip_ratio/high_max": 0.0018763126063277014, "clip_ratio/high_mean": 0.0006536056707773241, "clip_ratio/low_mean": 0.000420151141952374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010737568263721187, "epoch": 14.205247813411079, "grad_norm": 0.14857742190361023, "learning_rate": 1e-06, "loss": -0.0433, "step": 1443 }, { "clip_ratio/high_max": 0.0016658159001963213, "clip_ratio/high_mean": 0.0005681124803231796, "clip_ratio/low_mean": 0.00043058410483354237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009986965742427856, "epoch": 14.214577259475218, "grad_norm": 0.1751215010881424, "learning_rate": 1e-06, "loss": -0.0421, "step": 1444 }, { "clip_ratio/high_max": 0.0016592844986007549, "clip_ratio/high_mean": 0.0006511535666504642, "clip_ratio/low_mean": 0.0003755112857106724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010266648641845677, "epoch": 14.223906705539358, "grad_norm": 0.20355239510536194, "learning_rate": 1e-06, "loss": -0.0439, "step": 1445 }, { "clip_ratio/high_max": 0.0018718850806180853, "clip_ratio/high_mean": 0.0007245479837365565, "clip_ratio/low_mean": 0.0005283369182507158, "clip_ratio/low_min": 2.7448397304397076e-05, "clip_ratio/region_mean": 0.0012528849074442405, "epoch": 14.2332361516035, "grad_norm": 0.19260956346988678, "learning_rate": 1e-06, "loss": -0.0709, "step": 1446 }, { "clip_ratio/high_max": 0.001955975385499187, "clip_ratio/high_mean": 0.0007771363052597735, "clip_ratio/low_mean": 0.0003860049005197652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001163141194410855, "epoch": 14.242565597667639, "grad_norm": 0.17409080266952515, "learning_rate": 1e-06, "loss": -0.0479, "step": 1447 }, { "clip_ratio/high_max": 0.002171129501221003, "clip_ratio/high_mean": 0.000733781183953397, "clip_ratio/low_mean": 0.0004959609559591627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012297421271796338, "epoch": 14.251895043731778, "grad_norm": 0.23010940849781036, "learning_rate": 1e-06, "loss": -0.0634, "step": 1448 }, { "clip_ratio/high_max": 0.002001169094000943, "clip_ratio/high_mean": 0.0007219899798656115, "clip_ratio/low_mean": 0.0004736500932267518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011956400758208474, "epoch": 14.261224489795918, "grad_norm": 0.19281303882598877, "learning_rate": 1e-06, "loss": -0.0801, "step": 1449 }, { "clip_ratio/high_max": 0.0015444350374309579, "clip_ratio/high_mean": 0.0005883815820197924, "clip_ratio/low_mean": 0.0006475092959590256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00123589082613762, "epoch": 14.270553935860057, "grad_norm": 0.2029615044593811, "learning_rate": 1e-06, "loss": -0.0085, "step": 1450 }, { "clip_ratio/high_max": 0.0019900363877241034, "clip_ratio/high_mean": 0.0006671679911960382, "clip_ratio/low_mean": 0.0005634928675135598, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012306608732615132, "epoch": 14.279883381924199, "grad_norm": 0.24181339144706726, "learning_rate": 1e-06, "loss": -0.0109, "step": 1451 }, { "clip_ratio/high_max": 0.0021695214527426288, "clip_ratio/high_mean": 0.000893157979589887, "clip_ratio/low_mean": 0.0005565108431255794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014496688345388975, "epoch": 14.289212827988338, "grad_norm": 0.18149995803833008, "learning_rate": 1e-06, "loss": -0.0513, "step": 1452 }, { "clip_ratio/high_max": 0.0020423382811713964, "clip_ratio/high_mean": 0.0008601462177466601, "clip_ratio/low_mean": 0.0005887255219931831, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014488717788481154, "epoch": 14.298542274052478, "grad_norm": 0.22083869576454163, "learning_rate": 1e-06, "loss": -0.0492, "step": 1453 }, { "clip_ratio/high_max": 0.002583890403911937, "clip_ratio/high_mean": 0.001047742713126354, "clip_ratio/low_mean": 0.0005312302264428581, "clip_ratio/low_min": 1.907232217490673e-05, "clip_ratio/region_mean": 0.001578972955030622, "epoch": 14.307871720116617, "grad_norm": 0.19940228760242462, "learning_rate": 1e-06, "loss": -0.0733, "step": 1454 }, { "clip_ratio/high_max": 0.002029270661296323, "clip_ratio/high_mean": 0.0007921130363683915, "clip_ratio/low_mean": 0.0006799722841606126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014720853287144564, "epoch": 14.317201166180759, "grad_norm": 0.21839632093906403, "learning_rate": 1e-06, "loss": -0.0274, "step": 1455 }, { "clip_ratio/high_max": 0.0020451589225558564, "clip_ratio/high_mean": 0.0007512713909818558, "clip_ratio/low_mean": 0.0006776586196792778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014289300161181018, "epoch": 14.326530612244898, "grad_norm": 0.24854592978954315, "learning_rate": 1e-06, "loss": -0.0279, "step": 1456 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0828683035714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 822.8195190429688, "completions/mean_terminated_length": 527.068115234375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 14.335860058309038, "frac_reward_zero_std": 0.7098214626312256, "grad_norm": 0.18021522462368011, "learning_rate": 1e-06, "loss": -0.0452, "num_tokens": 825244842.0, "reward": 0.691964328289032, "reward_std": 0.12733787298202515, "rewards/simpleverify_reward/mean": 0.6919642686843872, "rewards/simpleverify_reward/std": 0.4616974890232086, "step": 1457 }, { "clip_ratio/high_max": 0.0014149772141536232, "clip_ratio/high_mean": 0.0005173773870410514, "clip_ratio/low_mean": 0.0003160787921387964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008334561807714636, "epoch": 14.345189504373177, "grad_norm": 0.20603474974632263, "learning_rate": 1e-06, "loss": -0.0361, "step": 1458 }, { "clip_ratio/high_max": 0.0016508529770362657, "clip_ratio/high_mean": 0.0005668551766575547, "clip_ratio/low_mean": 0.00034072801952333975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009075831767404452, "epoch": 14.354518950437317, "grad_norm": 0.20423346757888794, "learning_rate": 1e-06, "loss": -0.0347, "step": 1459 }, { "clip_ratio/high_max": 0.0018177807251049671, "clip_ratio/high_mean": 0.0006821450224379078, "clip_ratio/low_mean": 0.0003578706327971304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010400156788819004, "epoch": 14.363848396501458, "grad_norm": 0.23302273452281952, "learning_rate": 1e-06, "loss": -0.039, "step": 1460 }, { "clip_ratio/high_max": 0.0019665780091600027, "clip_ratio/high_mean": 0.0007355689358519157, "clip_ratio/low_mean": 0.0002777845056698425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00101335343606479, "epoch": 14.373177842565598, "grad_norm": 0.18124867975711823, "learning_rate": 1e-06, "loss": -0.068, "step": 1461 }, { "clip_ratio/high_max": 0.0018849416483135428, "clip_ratio/high_mean": 0.0007333572848438052, "clip_ratio/low_mean": 0.0004817177596123656, "clip_ratio/low_min": 1.749230250425171e-05, "clip_ratio/region_mean": 0.001215075022628298, "epoch": 14.382507288629737, "grad_norm": 0.8104054927825928, "learning_rate": 1e-06, "loss": 0.0005, "step": 1462 }, { "clip_ratio/high_max": 0.0017596285106264986, "clip_ratio/high_mean": 0.0007541623672295827, "clip_ratio/low_mean": 0.0005478616158143268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013020239712204784, "epoch": 14.391836734693877, "grad_norm": 0.19162164628505707, "learning_rate": 1e-06, "loss": -0.0256, "step": 1463 }, { "clip_ratio/high_max": 0.0015064723702380434, "clip_ratio/high_mean": 0.0005690353928002878, "clip_ratio/low_mean": 0.00043191611621296033, "clip_ratio/low_min": 1.5136836736928672e-05, "clip_ratio/region_mean": 0.0010009515208366793, "epoch": 14.401166180758018, "grad_norm": 0.2283337265253067, "learning_rate": 1e-06, "loss": 0.0194, "step": 1464 }, { "clip_ratio/high_max": 0.0017707100487314165, "clip_ratio/high_mean": 0.000647191549433046, "clip_ratio/low_mean": 0.00041040676569537027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001057598316037911, "epoch": 14.410495626822158, "grad_norm": 0.38590529561042786, "learning_rate": 1e-06, "loss": -0.0462, "step": 1465 }, { "clip_ratio/high_max": 0.0021227372635621578, "clip_ratio/high_mean": 0.0007548919857072178, "clip_ratio/low_mean": 0.0004441111714186263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001199003181682201, "epoch": 14.419825072886297, "grad_norm": 0.19625268876552582, "learning_rate": 1e-06, "loss": -0.059, "step": 1466 }, { "clip_ratio/high_max": 0.0020138848485657945, "clip_ratio/high_mean": 0.0006925085344846593, "clip_ratio/low_mean": 0.00042835628210013965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011208648174942937, "epoch": 14.429154518950437, "grad_norm": 0.2553805112838745, "learning_rate": 1e-06, "loss": -0.0474, "step": 1467 }, { "clip_ratio/high_max": 0.0018813544083968736, "clip_ratio/high_mean": 0.0006453354762925301, "clip_ratio/low_mean": 0.00045832788418920245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011036633550247643, "epoch": 14.438483965014576, "grad_norm": 0.16663505136966705, "learning_rate": 1e-06, "loss": -0.0548, "step": 1468 }, { "clip_ratio/high_max": 0.0021450254098454025, "clip_ratio/high_mean": 0.0007167880885390332, "clip_ratio/low_mean": 0.0006239533395273611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013407414116954897, "epoch": 14.447813411078718, "grad_norm": 0.18853157758712769, "learning_rate": 1e-06, "loss": -0.0064, "step": 1469 }, { "clip_ratio/high_max": 0.0022429019154515117, "clip_ratio/high_mean": 0.000807767997685005, "clip_ratio/low_mean": 0.0005962328061741573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014040008172742091, "epoch": 14.457142857142857, "grad_norm": 0.2987304627895355, "learning_rate": 1e-06, "loss": -0.0459, "step": 1470 }, { "clip_ratio/high_max": 0.002276386927405838, "clip_ratio/high_mean": 0.0008552510407753289, "clip_ratio/low_mean": 0.0005922376367379911, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014474886702373624, "epoch": 14.466472303206997, "grad_norm": 0.2037055641412735, "learning_rate": 1e-06, "loss": -0.0071, "step": 1471 }, { "clip_ratio/high_max": 0.0020816856012970675, "clip_ratio/high_mean": 0.000739793788852694, "clip_ratio/low_mean": 0.0006818989149905974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014216927156667225, "epoch": 14.475801749271136, "grad_norm": 0.2072845995426178, "learning_rate": 1e-06, "loss": -0.0007, "step": 1472 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0895647321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 871.2481689453125, "completions/mean_terminated_length": 554.0108032226562, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 14.485131195335278, "frac_reward_zero_std": 0.6964285969734192, "grad_norm": 0.20866422355175018, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 833896359.0, "reward": 0.6810826063156128, "reward_std": 0.1319391429424286, "rewards/simpleverify_reward/mean": 0.6810826063156128, "rewards/simpleverify_reward/std": 0.466073215007782, "step": 1473 }, { "clip_ratio/high_max": 0.0013939139389549382, "clip_ratio/high_mean": 0.0005406692876022134, "clip_ratio/low_mean": 0.0004105483544663002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009512176420685137, "epoch": 14.494460641399417, "grad_norm": 0.19112075865268707, "learning_rate": 1e-06, "loss": 0.0016, "step": 1474 }, { "clip_ratio/high_max": 0.0016099342465167865, "clip_ratio/high_mean": 0.0005592638863163302, "clip_ratio/low_mean": 0.0004469252153285197, "clip_ratio/low_min": 1.3023546671320219e-05, "clip_ratio/region_mean": 0.00100618908618344, "epoch": 14.503790087463557, "grad_norm": 0.2254648506641388, "learning_rate": 1e-06, "loss": 0.0162, "step": 1475 }, { "clip_ratio/high_max": 0.0018178365535277408, "clip_ratio/high_mean": 0.0007036296410660725, "clip_ratio/low_mean": 0.00048121848885784857, "clip_ratio/low_min": 2.2147412892081775e-05, "clip_ratio/region_mean": 0.0011848481626657303, "epoch": 14.513119533527696, "grad_norm": 0.28473883867263794, "learning_rate": 1e-06, "loss": -0.0632, "step": 1476 }, { "clip_ratio/high_max": 0.0017887598587549292, "clip_ratio/high_mean": 0.0006433891885535559, "clip_ratio/low_mean": 0.00038123163858472253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001024620869429782, "epoch": 14.522448979591836, "grad_norm": 0.17629823088645935, "learning_rate": 1e-06, "loss": -0.0694, "step": 1477 }, { "clip_ratio/high_max": 0.0018633121908351313, "clip_ratio/high_mean": 0.0006821527167630848, "clip_ratio/low_mean": 0.00043059256222477416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011127452962682582, "epoch": 14.531778425655977, "grad_norm": 0.1892654001712799, "learning_rate": 1e-06, "loss": -0.0531, "step": 1478 }, { "clip_ratio/high_max": 0.001555675124109257, "clip_ratio/high_mean": 0.0005544316700252239, "clip_ratio/low_mean": 0.0004057939695485402, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009602256195648806, "epoch": 14.541107871720117, "grad_norm": 0.2866798937320709, "learning_rate": 1e-06, "loss": -0.0272, "step": 1479 }, { "clip_ratio/high_max": 0.0019222931587137282, "clip_ratio/high_mean": 0.0007509213774028467, "clip_ratio/low_mean": 0.0004584818848343275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001209403319080593, "epoch": 14.550437317784256, "grad_norm": 0.1761849820613861, "learning_rate": 1e-06, "loss": -0.0404, "step": 1480 }, { "clip_ratio/high_max": 0.001781230115739163, "clip_ratio/high_mean": 0.0006334352183330338, "clip_ratio/low_mean": 0.0004468384063329722, "clip_ratio/low_min": 9.944312296283897e-06, "clip_ratio/region_mean": 0.0010802736178447958, "epoch": 14.559766763848396, "grad_norm": 0.2440309226512909, "learning_rate": 1e-06, "loss": -0.0235, "step": 1481 }, { "clip_ratio/high_max": 0.0016129588293551933, "clip_ratio/high_mean": 0.0006216432520886883, "clip_ratio/low_mean": 0.0005515457214642083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011731889899238013, "epoch": 14.569096209912537, "grad_norm": 0.17802369594573975, "learning_rate": 1e-06, "loss": -0.0327, "step": 1482 }, { "clip_ratio/high_max": 0.001977751628146507, "clip_ratio/high_mean": 0.0008022939255170058, "clip_ratio/low_mean": 0.0003803842787419853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011826781810668763, "epoch": 14.578425655976677, "grad_norm": 2.1266276836395264, "learning_rate": 1e-06, "loss": -0.0899, "step": 1483 }, { "clip_ratio/high_max": 0.0020685641866293736, "clip_ratio/high_mean": 0.0006943590469745686, "clip_ratio/low_mean": 0.0005415973296294396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012359563916106708, "epoch": 14.587755102040816, "grad_norm": 0.18481308221817017, "learning_rate": 1e-06, "loss": -0.0328, "step": 1484 }, { "clip_ratio/high_max": 0.001951387217559386, "clip_ratio/high_mean": 0.0007830593058315571, "clip_ratio/low_mean": 0.000615379187365761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013984385477670003, "epoch": 14.597084548104956, "grad_norm": 0.4970831871032715, "learning_rate": 1e-06, "loss": -0.0307, "step": 1485 }, { "clip_ratio/high_max": 0.00250672445690725, "clip_ratio/high_mean": 0.0009905835941026453, "clip_ratio/low_mean": 0.00041212388623534935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014027075085323304, "epoch": 14.606413994169095, "grad_norm": 0.20617042481899261, "learning_rate": 1e-06, "loss": -0.1188, "step": 1486 }, { "clip_ratio/high_max": 0.0023025107147987, "clip_ratio/high_mean": 0.0008518614758941112, "clip_ratio/low_mean": 0.0005937619198448374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014456234021054115, "epoch": 14.615743440233237, "grad_norm": 0.24923710525035858, "learning_rate": 1e-06, "loss": -0.0432, "step": 1487 }, { "clip_ratio/high_max": 0.002356052129471209, "clip_ratio/high_mean": 0.0009068300314538646, "clip_ratio/low_mean": 0.0005853701995874871, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001492200211941963, "epoch": 14.625072886297376, "grad_norm": 0.170623317360878, "learning_rate": 1e-06, "loss": -0.0569, "step": 1488 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4011.0, "completions/mean_length": 847.0534057617188, "completions/mean_terminated_length": 537.2516479492188, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 14.634402332361516, "frac_reward_zero_std": 0.7142857313156128, "grad_norm": 0.20812608301639557, "learning_rate": 1e-06, "loss": -0.0605, "num_tokens": 842362044.0, "reward": 0.688058078289032, "reward_std": 0.12424561381340027, "rewards/simpleverify_reward/mean": 0.6880580186843872, "rewards/simpleverify_reward/std": 0.4633024334907532, "step": 1489 }, { "clip_ratio/high_max": 0.0017352480535919312, "clip_ratio/high_mean": 0.0005948200468992582, "clip_ratio/low_mean": 0.00034153214369325724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000936352189455647, "epoch": 14.643731778425655, "grad_norm": 0.18909695744514465, "learning_rate": 1e-06, "loss": -0.0786, "step": 1490 }, { "clip_ratio/high_max": 0.0017432473032386042, "clip_ratio/high_mean": 0.0005919375735174981, "clip_ratio/low_mean": 0.00045978389334777603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001051721497788094, "epoch": 14.653061224489797, "grad_norm": 0.20759959518909454, "learning_rate": 1e-06, "loss": 0.0068, "step": 1491 }, { "clip_ratio/high_max": 0.001856827650044579, "clip_ratio/high_mean": 0.0006824282336310716, "clip_ratio/low_mean": 0.00039421129167749314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010766395134851336, "epoch": 14.662390670553936, "grad_norm": 1.3672367334365845, "learning_rate": 1e-06, "loss": -0.0404, "step": 1492 }, { "clip_ratio/high_max": 0.0018382960261078551, "clip_ratio/high_mean": 0.0007044558442430571, "clip_ratio/low_mean": 0.00045584048780256126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001160296316811582, "epoch": 14.671720116618076, "grad_norm": 0.21125027537345886, "learning_rate": 1e-06, "loss": -0.0244, "step": 1493 }, { "clip_ratio/high_max": 0.0018837383831851184, "clip_ratio/high_mean": 0.0006326035254460294, "clip_ratio/low_mean": 0.0004544978680769418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010871014019357972, "epoch": 14.681049562682215, "grad_norm": 0.17241013050079346, "learning_rate": 1e-06, "loss": -0.0354, "step": 1494 }, { "clip_ratio/high_max": 0.0017040628154063597, "clip_ratio/high_mean": 0.0005663011588694644, "clip_ratio/low_mean": 0.0004593524445226649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010256536243105074, "epoch": 14.690379008746355, "grad_norm": 0.4345332086086273, "learning_rate": 1e-06, "loss": -0.0311, "step": 1495 }, { "clip_ratio/high_max": 0.0017825818649725989, "clip_ratio/high_mean": 0.0006591110377485165, "clip_ratio/low_mean": 0.0005546512868477294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012137623234593775, "epoch": 14.699708454810496, "grad_norm": 0.20765557885169983, "learning_rate": 1e-06, "loss": -0.0347, "step": 1496 }, { "clip_ratio/high_max": 0.002219303001766093, "clip_ratio/high_mean": 0.0007308155400096439, "clip_ratio/low_mean": 0.0004701162534956893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012009317870251834, "epoch": 14.709037900874636, "grad_norm": 0.16625691950321198, "learning_rate": 1e-06, "loss": -0.046, "step": 1497 }, { "clip_ratio/high_max": 0.0021293425306794234, "clip_ratio/high_mean": 0.0006573044547621976, "clip_ratio/low_mean": 0.0005822867865390435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012395912090141792, "epoch": 14.718367346938775, "grad_norm": 0.17763440310955048, "learning_rate": 1e-06, "loss": -0.0569, "step": 1498 }, { "clip_ratio/high_max": 0.00233986306557199, "clip_ratio/high_mean": 0.0008385883993469179, "clip_ratio/low_mean": 0.0005517769434391084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013903653634770308, "epoch": 14.727696793002915, "grad_norm": 0.2089751809835434, "learning_rate": 1e-06, "loss": -0.0455, "step": 1499 }, { "clip_ratio/high_max": 0.002051780335023068, "clip_ratio/high_mean": 0.0008154932693287265, "clip_ratio/low_mean": 0.0006219465135473001, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014374397760548163, "epoch": 14.737026239067056, "grad_norm": 0.22098791599273682, "learning_rate": 1e-06, "loss": -0.0397, "step": 1500 }, { "clip_ratio/high_max": 0.0016778328390500974, "clip_ratio/high_mean": 0.0006789958806621144, "clip_ratio/low_mean": 0.0006463228501161211, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013253187644295394, "epoch": 14.746355685131196, "grad_norm": 0.1667434573173523, "learning_rate": 1e-06, "loss": -0.0149, "step": 1501 }, { "clip_ratio/high_max": 0.0017593391821719706, "clip_ratio/high_mean": 0.0006534998392453417, "clip_ratio/low_mean": 0.0006295956090980326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012830954747187207, "epoch": 14.755685131195335, "grad_norm": 0.16894058883190155, "learning_rate": 1e-06, "loss": -0.0121, "step": 1502 }, { "clip_ratio/high_max": 0.0019552148296497762, "clip_ratio/high_mean": 0.0007690822440054035, "clip_ratio/low_mean": 0.0005750506161348312, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013441328737826552, "epoch": 14.765014577259475, "grad_norm": 0.39967238903045654, "learning_rate": 1e-06, "loss": -0.0476, "step": 1503 }, { "clip_ratio/high_max": 0.0019298869920021389, "clip_ratio/high_mean": 0.0007113573592505418, "clip_ratio/low_mean": 0.0005865240964340046, "clip_ratio/low_min": 1.709518619463779e-05, "clip_ratio/region_mean": 0.0012978814229427371, "epoch": 14.774344023323614, "grad_norm": 0.18085499107837677, "learning_rate": 1e-06, "loss": -0.0545, "step": 1504 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0830078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 823.7890014648438, "completions/mean_terminated_length": 527.5824584960938, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 14.783673469387756, "frac_reward_zero_std": 0.7120535969734192, "grad_norm": 0.22972293198108673, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 850742187.0, "reward": 0.6833147406578064, "reward_std": 0.12597832083702087, "rewards/simpleverify_reward/mean": 0.6833147406578064, "rewards/simpleverify_reward/std": 0.4651997685432434, "step": 1505 }, { "clip_ratio/high_max": 0.0013229057913122233, "clip_ratio/high_mean": 0.0005157997111382429, "clip_ratio/low_mean": 0.00041183774078490387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009276374548790045, "epoch": 14.793002915451895, "grad_norm": 0.19693727791309357, "learning_rate": 1e-06, "loss": 0.0012, "step": 1506 }, { "clip_ratio/high_max": 0.0016892652020032983, "clip_ratio/high_mean": 0.0006063389901100891, "clip_ratio/low_mean": 0.0003415998708078405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009479388663748978, "epoch": 14.802332361516035, "grad_norm": 0.22609585523605347, "learning_rate": 1e-06, "loss": -0.032, "step": 1507 }, { "clip_ratio/high_max": 0.0016604134580120444, "clip_ratio/high_mean": 0.0006075989040255081, "clip_ratio/low_mean": 0.00041913132736226544, "clip_ratio/low_min": 1.4367816220328677e-05, "clip_ratio/region_mean": 0.0010267302277497947, "epoch": 14.811661807580174, "grad_norm": 0.20292508602142334, "learning_rate": 1e-06, "loss": -0.0112, "step": 1508 }, { "clip_ratio/high_max": 0.002114912564138649, "clip_ratio/high_mean": 0.00070740146747994, "clip_ratio/low_mean": 0.0005189004550629761, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012263018907106016, "epoch": 14.820991253644316, "grad_norm": 0.19955530762672424, "learning_rate": 1e-06, "loss": -0.0334, "step": 1509 }, { "clip_ratio/high_max": 0.0019007637674803846, "clip_ratio/high_mean": 0.0008103164800559171, "clip_ratio/low_mean": 0.0004215151870994305, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012318316476012114, "epoch": 14.830320699708455, "grad_norm": 0.19874800741672516, "learning_rate": 1e-06, "loss": -0.0601, "step": 1510 }, { "clip_ratio/high_max": 0.0018786236250889488, "clip_ratio/high_mean": 0.0007427738628393854, "clip_ratio/low_mean": 0.0004103237577055552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011530976189533249, "epoch": 14.839650145772595, "grad_norm": 0.18748806416988373, "learning_rate": 1e-06, "loss": -0.0823, "step": 1511 }, { "clip_ratio/high_max": 0.0016389785414503422, "clip_ratio/high_mean": 0.0006411163635675621, "clip_ratio/low_mean": 0.00042682751518441364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010679438564693555, "epoch": 14.848979591836734, "grad_norm": 0.1504000425338745, "learning_rate": 1e-06, "loss": -0.0587, "step": 1512 }, { "clip_ratio/high_max": 0.0017157568072434515, "clip_ratio/high_mean": 0.0007115294001778238, "clip_ratio/low_mean": 0.0006356660360324895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001347195455309702, "epoch": 14.858309037900874, "grad_norm": 0.2405155748128891, "learning_rate": 1e-06, "loss": -0.0398, "step": 1513 }, { "clip_ratio/high_max": 0.0018918941968877334, "clip_ratio/high_mean": 0.0006983533403399633, "clip_ratio/low_mean": 0.0005208725769989542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012192259109724546, "epoch": 14.867638483965015, "grad_norm": 0.22409887611865997, "learning_rate": 1e-06, "loss": -0.003, "step": 1514 }, { "clip_ratio/high_max": 0.0019245099101681262, "clip_ratio/high_mean": 0.0007059572308207862, "clip_ratio/low_mean": 0.0006228073661986855, "clip_ratio/low_min": 5.1152665037079714e-05, "clip_ratio/region_mean": 0.001328764605204924, "epoch": 14.876967930029155, "grad_norm": 0.15444381535053253, "learning_rate": 1e-06, "loss": -0.0373, "step": 1515 }, { "clip_ratio/high_max": 0.0016752060946600977, "clip_ratio/high_mean": 0.000696099637025327, "clip_ratio/low_mean": 0.0005193916226744477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012154912947153207, "epoch": 14.886297376093294, "grad_norm": 0.155255526304245, "learning_rate": 1e-06, "loss": -0.0437, "step": 1516 }, { "clip_ratio/high_max": 0.0020866779959760606, "clip_ratio/high_mean": 0.0007626133447047323, "clip_ratio/low_mean": 0.0004972266422100802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012598399698617868, "epoch": 14.895626822157434, "grad_norm": 0.37451237440109253, "learning_rate": 1e-06, "loss": -0.0171, "step": 1517 }, { "epoch": 14.895626822157434, "step": 1517, "total_flos": 0.0, "train_loss": -0.016174088740447498, "train_runtime": 46045.8921, "train_samples_per_second": 31.134, "train_steps_per_second": 0.035 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 850742187, "num_train_epochs": 15, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }