{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.73724117818606, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3119.0, "completions/mean_length": 606.5625, "completions/mean_terminated_length": 535.0250854492188, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.0023330417031204435, "grad_norm": 0.139825239777565, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 556256.0, "reward": 0.5424107313156128, "reward_std": 0.24488291144371033, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1 }, { "clip_ratio/high_max": 0.0022986409749137238, "clip_ratio/high_mean": 0.0010865736549021676, "clip_ratio/low_mean": 0.0006070504496165086, "clip_ratio/low_min": 3.928235310013406e-05, "clip_ratio/region_mean": 0.0016936241154326126, "epoch": 0.004666083406240887, "grad_norm": 0.13357040286064148, "learning_rate": 1e-06, "loss": -0.0052, "step": 2 }, { "clip_ratio/high_max": 0.002638528043462429, "clip_ratio/high_mean": 0.0011571372342586983, "clip_ratio/low_mean": 0.0006701715410599718, "clip_ratio/low_min": 7.16715467206086e-05, "clip_ratio/region_mean": 0.00182730880624149, "epoch": 0.00699912510936133, "grad_norm": 0.12927649915218353, "learning_rate": 1e-06, "loss": -0.0053, "step": 3 }, { "clip_ratio/high_max": 0.002463254946633242, "clip_ratio/high_mean": 0.0010911843601206783, "clip_ratio/low_mean": 0.0006269629561757029, "clip_ratio/low_min": 5.3640959777112585e-05, "clip_ratio/region_mean": 0.0017181472649099305, "epoch": 0.009332166812481774, "grad_norm": 0.13325555622577667, "learning_rate": 1e-06, "loss": -0.0053, "step": 4 }, { "clip_ratio/high_max": 0.002511580994905671, "clip_ratio/high_mean": 0.0010959685314446688, "clip_ratio/low_mean": 0.0007915860005596187, "clip_ratio/low_min": 5.303173657011939e-05, "clip_ratio/region_mean": 0.0018875545429182239, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2607.0, "completions/mean_length": 590.552490234375, "completions/mean_terminated_length": 554.9841918945312, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.011665208515602217, "grad_norm": 0.1198158785700798, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 1135783.0, "reward": 0.4832589626312256, "reward_std": 0.2370942384004593, "rewards/verify_math_reward/mean": 0.4832589328289032, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 5 }, { "clip_ratio/high_max": 0.0023428853455698118, "clip_ratio/high_mean": 0.0009512929136690218, "clip_ratio/low_mean": 0.0005539788216992747, "clip_ratio/low_min": 3.876696337101748e-05, "clip_ratio/region_mean": 0.0015052717571961693, "epoch": 0.01399825021872266, "grad_norm": 0.12222807109355927, "learning_rate": 1e-06, "loss": 0.0036, "step": 6 }, { "clip_ratio/high_max": 0.0023241781964316033, "clip_ratio/high_mean": 0.0010337102903577033, "clip_ratio/low_mean": 0.0005319816227711271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001565691884025, "epoch": 0.016331291921843103, "grad_norm": 0.1279895007610321, "learning_rate": 1e-06, "loss": 0.0036, "step": 7 }, { "clip_ratio/high_max": 0.002724015103012789, "clip_ratio/high_mean": 0.0011034075978386682, "clip_ratio/low_mean": 0.0006094714681239566, "clip_ratio/low_min": 2.932166989921825e-05, "clip_ratio/region_mean": 0.0017128790204878896, "epoch": 0.018664333624963548, "grad_norm": 0.12145639955997467, "learning_rate": 1e-06, "loss": 0.0036, "step": 8 }, { "clip_ratio/high_max": 0.0023459133226424456, "clip_ratio/high_mean": 0.0009634899925003992, "clip_ratio/low_mean": 0.0006493575965578202, "clip_ratio/low_min": 1.1285662367299665e-05, "clip_ratio/region_mean": 0.0016128476345329545, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 581.685302734375, "completions/mean_terminated_length": 538.0045166015625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "epoch": 0.02099737532808399, "grad_norm": 0.11929132789373398, "learning_rate": 1e-06, "loss": 0.0097, "num_tokens": 1710461.0, "reward": 0.5290178656578064, "reward_std": 0.21602025628089905, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 9 }, { "clip_ratio/high_max": 0.00253376059117727, "clip_ratio/high_mean": 0.0010959464925690554, "clip_ratio/low_mean": 0.0009177182091661962, "clip_ratio/low_min": 6.172536632220726e-05, "clip_ratio/region_mean": 0.0020136647290200926, "epoch": 0.023330417031204434, "grad_norm": 0.11741513758897781, "learning_rate": 1e-06, "loss": 0.0096, "step": 10 }, { "clip_ratio/high_max": 0.0021746221245848574, "clip_ratio/high_mean": 0.0009437728404009249, "clip_ratio/low_mean": 0.0006222142146725673, "clip_ratio/low_min": 3.810313319263514e-05, "clip_ratio/region_mean": 0.0015659870696254075, "epoch": 0.025663458734324875, "grad_norm": 0.12099823355674744, "learning_rate": 1e-06, "loss": 0.0096, "step": 11 }, { "clip_ratio/high_max": 0.002284212321683299, "clip_ratio/high_mean": 0.0009622946818126366, "clip_ratio/low_mean": 0.0006222454194357852, "clip_ratio/low_min": 3.2773188650025986e-05, "clip_ratio/region_mean": 0.0015845401139813475, "epoch": 0.02799650043744532, "grad_norm": 0.12343762069940567, "learning_rate": 1e-06, "loss": 0.0096, "step": 12 }, { "clip_ratio/high_max": 0.002133540387148969, "clip_ratio/high_mean": 0.0008776721952017397, "clip_ratio/low_mean": 0.0004960685146215837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013737407025473658, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 587.7221069335938, "completions/mean_terminated_length": 536.0713500976562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.03032954214056576, "grad_norm": 0.1237315759062767, "learning_rate": 1e-06, "loss": -0.008, "num_tokens": 2263244.0, "reward": 0.6004464626312256, "reward_std": 0.19208844006061554, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 13 }, { "clip_ratio/high_max": 0.002390783491136972, "clip_ratio/high_mean": 0.0009201841166941449, "clip_ratio/low_mean": 0.0005904453855691827, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015106295177247375, "epoch": 0.032662583843686206, "grad_norm": 0.1241251677274704, "learning_rate": 1e-06, "loss": -0.0081, "step": 14 }, { "clip_ratio/high_max": 0.00265116490481887, "clip_ratio/high_mean": 0.0010003927927755285, "clip_ratio/low_mean": 0.0005856096286152024, "clip_ratio/low_min": 1.5866971807554364e-05, "clip_ratio/region_mean": 0.0015860024504945613, "epoch": 0.03499562554680665, "grad_norm": 0.1218414381146431, "learning_rate": 1e-06, "loss": -0.0081, "step": 15 }, { "clip_ratio/high_max": 0.002377994002017658, "clip_ratio/high_mean": 0.0008695639698999003, "clip_ratio/low_mean": 0.0005416954845713917, "clip_ratio/low_min": 1.2747297660098411e-05, "clip_ratio/region_mean": 0.0014112594762991648, "epoch": 0.037328667249927096, "grad_norm": 0.12389780580997467, "learning_rate": 1e-06, "loss": -0.0081, "step": 16 }, { "clip_ratio/high_max": 0.0024106820128508843, "clip_ratio/high_mean": 0.0009968766898964532, "clip_ratio/low_mean": 0.0006063756100047613, "clip_ratio/low_min": 1.5118529518076684e-05, "clip_ratio/region_mean": 0.0016032523271860555, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2943.0, "completions/mean_length": 569.2154541015625, "completions/mean_terminated_length": 545.4393310546875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.039661708953047534, "grad_norm": 0.12719471752643585, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 2832253.0, "reward": 0.5446428656578064, "reward_std": 0.2242865115404129, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 17 }, { "clip_ratio/high_max": 0.0025036680308403447, "clip_ratio/high_mean": 0.0010271042483509518, "clip_ratio/low_mean": 0.0006537355602631578, "clip_ratio/low_min": 2.4149921955540776e-05, "clip_ratio/region_mean": 0.0016808397995191626, "epoch": 0.04199475065616798, "grad_norm": 0.12676291167736053, "learning_rate": 1e-06, "loss": 0.0134, "step": 18 }, { "clip_ratio/high_max": 0.0027959811995970085, "clip_ratio/high_mean": 0.0011196248633495998, "clip_ratio/low_mean": 0.000717350643753889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018369754980085418, "epoch": 0.04432779235928842, "grad_norm": 0.12739385664463043, "learning_rate": 1e-06, "loss": 0.0133, "step": 19 }, { "clip_ratio/high_max": 0.0029148588073439896, "clip_ratio/high_mean": 0.001127249219280202, "clip_ratio/low_mean": 0.0007854968207539059, "clip_ratio/low_min": 6.304252019617707e-05, "clip_ratio/region_mean": 0.0019127460473100655, "epoch": 0.04666083406240887, "grad_norm": 0.12382540851831436, "learning_rate": 1e-06, "loss": 0.0132, "step": 20 }, { "clip_ratio/high_max": 0.0019592746248235926, "clip_ratio/high_mean": 0.0008812815285637043, "clip_ratio/low_mean": 0.0007547873065050226, "clip_ratio/low_min": 7.309357533813454e-05, "clip_ratio/region_mean": 0.0016360688241547905, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3173.0, "completions/mean_length": 622.6663208007812, "completions/mean_terminated_length": 559.5147705078125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.048993875765529306, "grad_norm": 0.13165442645549774, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 3415050.0, "reward": 0.5245535969734192, "reward_std": 0.24107471108436584, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 21 }, { "clip_ratio/high_max": 0.0024175399157684296, "clip_ratio/high_mean": 0.0009496703860349953, "clip_ratio/low_mean": 0.00075606488280755, "clip_ratio/low_min": 5.7472104344924446e-05, "clip_ratio/region_mean": 0.0017057352742995135, "epoch": 0.05132691746864975, "grad_norm": 0.13229161500930786, "learning_rate": 1e-06, "loss": -0.0024, "step": 22 }, { "clip_ratio/high_max": 0.0022421761314035393, "clip_ratio/high_mean": 0.0009404981101397425, "clip_ratio/low_mean": 0.0007850584315747255, "clip_ratio/low_min": 6.583995582332136e-05, "clip_ratio/region_mean": 0.0017255565471714363, "epoch": 0.053659959171770195, "grad_norm": 0.12714257836341858, "learning_rate": 1e-06, "loss": -0.0026, "step": 23 }, { "clip_ratio/high_max": 0.002484742122760508, "clip_ratio/high_mean": 0.0010370668096584268, "clip_ratio/low_mean": 0.0007963930911500938, "clip_ratio/low_min": 6.353132994263433e-05, "clip_ratio/region_mean": 0.0018334599080844782, "epoch": 0.05599300087489064, "grad_norm": 0.13083310425281525, "learning_rate": 1e-06, "loss": -0.0026, "step": 24 }, { "clip_ratio/high_max": 0.002489036211045459, "clip_ratio/high_mean": 0.0010009248289861716, "clip_ratio/low_mean": 0.0006285366598604014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016294614688376896, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 580.6439819335938, "completions/mean_terminated_length": 536.9503173828125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.058326042578011085, "grad_norm": 0.12958918511867523, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 3988707.0, "reward": 0.5892857313156128, "reward_std": 0.20095311105251312, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 25 }, { "clip_ratio/high_max": 0.0024675652748555876, "clip_ratio/high_mean": 0.0009995967520808335, "clip_ratio/low_mean": 0.0006661116531176958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001665708430664381, "epoch": 0.06065908428113152, "grad_norm": 0.12610486149787903, "learning_rate": 1e-06, "loss": 0.0056, "step": 26 }, { "clip_ratio/high_max": 0.002277686129673384, "clip_ratio/high_mean": 0.0009735188032209408, "clip_ratio/low_mean": 0.0006767354534531478, "clip_ratio/low_min": 3.548448148649186e-05, "clip_ratio/region_mean": 0.001650254249398131, "epoch": 0.06299212598425197, "grad_norm": 0.1267174482345581, "learning_rate": 1e-06, "loss": 0.0057, "step": 27 }, { "clip_ratio/high_max": 0.002439399620925542, "clip_ratio/high_mean": 0.0010170852947339881, "clip_ratio/low_mean": 0.0007562009886896703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001773286254319828, "epoch": 0.06532516768737241, "grad_norm": 0.12486453354358673, "learning_rate": 1e-06, "loss": 0.0055, "step": 28 }, { "clip_ratio/high_max": 0.0021370626272982918, "clip_ratio/high_mean": 0.0008809797818685183, "clip_ratio/low_mean": 0.0005185944191907765, "clip_ratio/low_min": 3.953835221182089e-05, "clip_ratio/region_mean": 0.0013995742046972737, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3560.0, "completions/mean_length": 678.6060791015625, "completions/mean_terminated_length": 604.5689697265625, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.06765820939049286, "grad_norm": 0.11682160198688507, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 4593306.0, "reward": 0.5457589626312256, "reward_std": 0.21774594485759735, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 29 }, { "clip_ratio/high_max": 0.0021843241120222956, "clip_ratio/high_mean": 0.0009300625351897907, "clip_ratio/low_mean": 0.0006210761221154826, "clip_ratio/low_min": 4.4221876123629045e-05, "clip_ratio/region_mean": 0.0015511386664002202, "epoch": 0.0699912510936133, "grad_norm": 0.1146954670548439, "learning_rate": 1e-06, "loss": 0.0045, "step": 30 }, { "clip_ratio/high_max": 0.0024472579316352494, "clip_ratio/high_mean": 0.0010522642951400485, "clip_ratio/low_mean": 0.0005727457664761459, "clip_ratio/low_min": 3.6030963201483246e-05, "clip_ratio/region_mean": 0.001625010023417417, "epoch": 0.07232429279673375, "grad_norm": 0.1153184175491333, "learning_rate": 1e-06, "loss": 0.0044, "step": 31 }, { "clip_ratio/high_max": 0.0019284382869955152, "clip_ratio/high_mean": 0.000992046516330447, "clip_ratio/low_mean": 0.0006042511531632044, "clip_ratio/low_min": 2.522551039874088e-05, "clip_ratio/region_mean": 0.0015962977122399025, "epoch": 0.07465733449985419, "grad_norm": 0.1235361248254776, "learning_rate": 1e-06, "loss": 0.0044, "step": 32 }, { "clip_ratio/high_max": 0.0021176241425564513, "clip_ratio/high_mean": 0.0009372366621391848, "clip_ratio/low_mean": 0.0005595719358097995, "clip_ratio/low_min": 6.470445987361018e-05, "clip_ratio/region_mean": 0.0014968086179578677, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 700.318115234375, "completions/mean_terminated_length": 594.8135986328125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.07699037620297462, "grad_norm": 0.12378786504268646, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 5199175.0, "reward": 0.5412946939468384, "reward_std": 0.21545571088790894, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 33 }, { "clip_ratio/high_max": 0.0022196359786903486, "clip_ratio/high_mean": 0.0009039962114911759, "clip_ratio/low_mean": 0.000660695368424058, "clip_ratio/low_min": 3.265412669861689e-05, "clip_ratio/region_mean": 0.0015646915635443293, "epoch": 0.07932341790609507, "grad_norm": 0.125546395778656, "learning_rate": 1e-06, "loss": -0.0103, "step": 34 }, { "clip_ratio/high_max": 0.0023568391989101656, "clip_ratio/high_mean": 0.0009934375430020737, "clip_ratio/low_mean": 0.0006520127171825152, "clip_ratio/low_min": 4.448789968591882e-05, "clip_ratio/region_mean": 0.0016454502547276206, "epoch": 0.08165645960921551, "grad_norm": 0.11966123431921005, "learning_rate": 1e-06, "loss": -0.0104, "step": 35 }, { "clip_ratio/high_max": 0.002292566980031552, "clip_ratio/high_mean": 0.0009935368743754225, "clip_ratio/low_mean": 0.0006416244887077482, "clip_ratio/low_min": 7.534917676821351e-05, "clip_ratio/region_mean": 0.0016351613521692343, "epoch": 0.08398950131233596, "grad_norm": 0.12310697883367538, "learning_rate": 1e-06, "loss": -0.0103, "step": 36 }, { "clip_ratio/high_max": 0.00229012560157571, "clip_ratio/high_mean": 0.0009375477602588944, "clip_ratio/low_mean": 0.0007024908227322157, "clip_ratio/low_min": 2.9890166842960753e-05, "clip_ratio/region_mean": 0.0016400385648012161, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 629.1998291015625, "completions/mean_terminated_length": 578.15966796875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.0863225430154564, "grad_norm": 0.12611764669418335, "learning_rate": 1e-06, "loss": 0.0144, "num_tokens": 5804930.0, "reward": 0.5290178656578064, "reward_std": 0.23063203692436218, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 37 }, { "clip_ratio/high_max": 0.0026505103378440253, "clip_ratio/high_mean": 0.000995517555566039, "clip_ratio/low_mean": 0.000749046572309453, "clip_ratio/low_min": 5.4068157623987645e-05, "clip_ratio/region_mean": 0.0017445641424274072, "epoch": 0.08865558471857685, "grad_norm": 0.12353263795375824, "learning_rate": 1e-06, "loss": 0.0143, "step": 38 }, { "clip_ratio/high_max": 0.002375702024437487, "clip_ratio/high_mean": 0.0009863118175417185, "clip_ratio/low_mean": 0.0008022430338314734, "clip_ratio/low_min": 3.6729577914229594e-05, "clip_ratio/region_mean": 0.0017885548804770224, "epoch": 0.09098862642169729, "grad_norm": 0.12338662147521973, "learning_rate": 1e-06, "loss": 0.0143, "step": 39 }, { "clip_ratio/high_max": 0.0024909446510719135, "clip_ratio/high_mean": 0.0010256626637783484, "clip_ratio/low_mean": 0.000834449481772026, "clip_ratio/low_min": 6.956895595067181e-05, "clip_ratio/region_mean": 0.001860112141002901, "epoch": 0.09332166812481774, "grad_norm": 0.12385766208171844, "learning_rate": 1e-06, "loss": 0.0142, "step": 40 }, { "clip_ratio/high_max": 0.001583784669492161, "clip_ratio/high_mean": 0.0005768877708760556, "clip_ratio/low_mean": 0.0005286252780933864, "clip_ratio/low_min": 1.4124293556960765e-05, "clip_ratio/region_mean": 0.0011055130635213573, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2063.0, "completions/mean_length": 667.3471069335938, "completions/mean_terminated_length": 605.0079345703125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.09565470982793818, "grad_norm": 0.1104072779417038, "learning_rate": 1e-06, "loss": 0.0185, "num_tokens": 6421681.0, "reward": 0.5066964626312256, "reward_std": 0.1720547378063202, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 41 }, { "clip_ratio/high_max": 0.0020037883587065153, "clip_ratio/high_mean": 0.0006965057691559196, "clip_ratio/low_mean": 0.0006347013313643401, "clip_ratio/low_min": 2.8372424822009634e-05, "clip_ratio/region_mean": 0.001331207105977228, "epoch": 0.09798775153105861, "grad_norm": 0.11040699481964111, "learning_rate": 1e-06, "loss": 0.0184, "step": 42 }, { "clip_ratio/high_max": 0.0018374437386228237, "clip_ratio/high_mean": 0.0006500711151602445, "clip_ratio/low_mean": 0.0005802347404824104, "clip_ratio/low_min": 1.0425354048493318e-05, "clip_ratio/region_mean": 0.001230305842909729, "epoch": 0.10032079323417906, "grad_norm": 0.10948064178228378, "learning_rate": 1e-06, "loss": 0.0184, "step": 43 }, { "clip_ratio/high_max": 0.0018857809591281693, "clip_ratio/high_mean": 0.0006436912299250253, "clip_ratio/low_mean": 0.000669299080982455, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013129903072695015, "epoch": 0.1026538349372995, "grad_norm": 0.1097576767206192, "learning_rate": 1e-06, "loss": 0.0183, "step": 44 }, { "clip_ratio/high_max": 0.001930827418618719, "clip_ratio/high_mean": 0.0006981309613820486, "clip_ratio/low_mean": 0.0005967152856101166, "clip_ratio/low_min": 2.5625256967032328e-05, "clip_ratio/region_mean": 0.001294846246310044, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 659.1183471679688, "completions/mean_terminated_length": 552.333740234375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.10498687664041995, "grad_norm": 0.11341482400894165, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 7004139.0, "reward": 0.5401785969734192, "reward_std": 0.15878842771053314, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 45 }, { "clip_ratio/high_max": 0.002184344320994569, "clip_ratio/high_mean": 0.000801586273155408, "clip_ratio/low_mean": 0.0006106818127591396, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.0014122680768196005, "epoch": 0.10731991834354039, "grad_norm": 0.1139429360628128, "learning_rate": 1e-06, "loss": 0.0021, "step": 46 }, { "clip_ratio/high_max": 0.0022679470203001983, "clip_ratio/high_mean": 0.000745011353501468, "clip_ratio/low_mean": 0.0006523218762595206, "clip_ratio/low_min": 1.2773350135830697e-05, "clip_ratio/region_mean": 0.0013973332061141264, "epoch": 0.10965296004666084, "grad_norm": 0.11152958869934082, "learning_rate": 1e-06, "loss": 0.002, "step": 47 }, { "clip_ratio/high_max": 0.0024709537283342797, "clip_ratio/high_mean": 0.0008462621262879111, "clip_ratio/low_mean": 0.0006355218320095446, "clip_ratio/low_min": 3.843788363155909e-05, "clip_ratio/region_mean": 0.001481783951021498, "epoch": 0.11198600174978128, "grad_norm": 0.11159171164035797, "learning_rate": 1e-06, "loss": 0.002, "step": 48 }, { "clip_ratio/high_max": 0.002606435209600022, "clip_ratio/high_mean": 0.0010781428645714186, "clip_ratio/low_mean": 0.000551244026837594, "clip_ratio/low_min": 8.372309730475536e-05, "clip_ratio/region_mean": 0.0016293868975481018, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 655.7846069335938, "completions/mean_terminated_length": 597.211181640625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.11431904345290173, "grad_norm": 0.13207431137561798, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 7618274.0, "reward": 0.5558035969734192, "reward_std": 0.22808194160461426, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 49 }, { "clip_ratio/high_max": 0.002573193338321289, "clip_ratio/high_mean": 0.0010621982528391527, "clip_ratio/low_mean": 0.000621154951659264, "clip_ratio/low_min": 4.7855231059656944e-05, "clip_ratio/region_mean": 0.0016833531844895333, "epoch": 0.11665208515602217, "grad_norm": 0.13091568648815155, "learning_rate": 1e-06, "loss": -0.0009, "step": 50 }, { "clip_ratio/high_max": 0.002612927622976713, "clip_ratio/high_mean": 0.0010625724717101548, "clip_ratio/low_mean": 0.0007023778762231814, "clip_ratio/low_min": 5.2263617362768855e-05, "clip_ratio/region_mean": 0.0017649503679422196, "epoch": 0.1189851268591426, "grad_norm": 0.13183654844760895, "learning_rate": 1e-06, "loss": -0.0009, "step": 51 }, { "clip_ratio/high_max": 0.0026154524603043683, "clip_ratio/high_mean": 0.001107633943320252, "clip_ratio/low_mean": 0.00068232952071412, "clip_ratio/low_min": 3.851271958410507e-05, "clip_ratio/region_mean": 0.0017899634622153826, "epoch": 0.12131816856226305, "grad_norm": 0.12829582393169403, "learning_rate": 1e-06, "loss": -0.001, "step": 52 }, { "clip_ratio/high_max": 0.002519420573662501, "clip_ratio/high_mean": 0.0010384653105575126, "clip_ratio/low_mean": 0.0005523251293197973, "clip_ratio/low_min": 3.0062635232752655e-05, "clip_ratio/region_mean": 0.0015907904598861933, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3275.0, "completions/mean_length": 595.1596069335938, "completions/mean_terminated_length": 535.553955078125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.12365121026538349, "grad_norm": 0.12745356559753418, "learning_rate": 1e-06, "loss": -0.0042, "num_tokens": 8186209.0, "reward": 0.5691964626312256, "reward_std": 0.22529542446136475, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 53 }, { "clip_ratio/high_max": 0.002658939854882192, "clip_ratio/high_mean": 0.00113034357491415, "clip_ratio/low_mean": 0.0006279497829382308, "clip_ratio/low_min": 6.613486675632885e-05, "clip_ratio/region_mean": 0.0017582933578523807, "epoch": 0.12598425196850394, "grad_norm": 0.12348710000514984, "learning_rate": 1e-06, "loss": -0.0042, "step": 54 }, { "clip_ratio/high_max": 0.0025012689584400505, "clip_ratio/high_mean": 0.001088208387955092, "clip_ratio/low_mean": 0.0006285139834290021, "clip_ratio/low_min": 3.1737179597257636e-05, "clip_ratio/region_mean": 0.0017167224104923662, "epoch": 0.1283172936716244, "grad_norm": 0.12301290035247803, "learning_rate": 1e-06, "loss": -0.0043, "step": 55 }, { "clip_ratio/high_max": 0.002401656427537091, "clip_ratio/high_mean": 0.001099151726521086, "clip_ratio/low_mean": 0.0007453204689227277, "clip_ratio/low_min": 5.498792961589061e-05, "clip_ratio/region_mean": 0.00184447223728057, "epoch": 0.13065033537474482, "grad_norm": 0.12256094813346863, "learning_rate": 1e-06, "loss": -0.0044, "step": 56 }, { "clip_ratio/high_max": 0.0018831756169674918, "clip_ratio/high_mean": 0.0007435029820044292, "clip_ratio/low_mean": 0.0005309441312419949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012744471132464241, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3656.0, "completions/mean_length": 642.0803833007812, "completions/mean_terminated_length": 603.0971069335938, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.13298337707786526, "grad_norm": 0.1111573651432991, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 8814017.0, "reward": 0.5848214626312256, "reward_std": 0.17400752007961273, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 57 }, { "clip_ratio/high_max": 0.0020343345895526, "clip_ratio/high_mean": 0.0007646062240382889, "clip_ratio/low_mean": 0.0006124447772890562, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013770509904134087, "epoch": 0.13531641878098571, "grad_norm": 0.1069447249174118, "learning_rate": 1e-06, "loss": 0.0095, "step": 58 }, { "clip_ratio/high_max": 0.001814778457628563, "clip_ratio/high_mean": 0.0007152991438488243, "clip_ratio/low_mean": 0.000650829207188508, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013661283046531025, "epoch": 0.13764946048410615, "grad_norm": 0.10686612129211426, "learning_rate": 1e-06, "loss": 0.0095, "step": 59 }, { "clip_ratio/high_max": 0.002180582654546015, "clip_ratio/high_mean": 0.0008631766177131794, "clip_ratio/low_mean": 0.0006756882612535264, "clip_ratio/low_min": 1.304801662627142e-05, "clip_ratio/region_mean": 0.001538864893518621, "epoch": 0.1399825021872266, "grad_norm": 0.10535623878240585, "learning_rate": 1e-06, "loss": 0.0094, "step": 60 }, { "clip_ratio/high_max": 0.002108367465552874, "clip_ratio/high_mean": 0.0007626612023159396, "clip_ratio/low_mean": 0.0004933809123031097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012560421346279327, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 694.1864013671875, "completions/mean_terminated_length": 592.5230102539062, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.14231554389034703, "grad_norm": 0.11304501444101334, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 9420976.0, "reward": 0.5569196939468384, "reward_std": 0.1749839335680008, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 61 }, { "clip_ratio/high_max": 0.0021938198697171174, "clip_ratio/high_mean": 0.0008237038055085577, "clip_ratio/low_mean": 0.000559093690753798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013827974908053875, "epoch": 0.1446485855934675, "grad_norm": 0.10986457765102386, "learning_rate": 1e-06, "loss": 0.0074, "step": 62 }, { "clip_ratio/high_max": 0.002056484743661713, "clip_ratio/high_mean": 0.0008661098709126236, "clip_ratio/low_mean": 0.0005379781496230862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014040879941603635, "epoch": 0.14698162729658792, "grad_norm": 0.11047877371311188, "learning_rate": 1e-06, "loss": 0.0073, "step": 63 }, { "clip_ratio/high_max": 0.002259606761072064, "clip_ratio/high_mean": 0.0008478759955323767, "clip_ratio/low_mean": 0.0005690079124178737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014168839370540809, "epoch": 0.14931466899970838, "grad_norm": 0.10816746205091476, "learning_rate": 1e-06, "loss": 0.0072, "step": 64 }, { "clip_ratio/high_max": 0.002090176487399731, "clip_ratio/high_mean": 0.0008006530915736221, "clip_ratio/low_mean": 0.000709360796463443, "clip_ratio/low_min": 4.4256788896746e-05, "clip_ratio/region_mean": 0.001510013888037065, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2538.0, "completions/mean_length": 605.8460083007812, "completions/mean_terminated_length": 578.364501953125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.15164771070282881, "grad_norm": 0.12577134370803833, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 10020766.0, "reward": 0.5837053656578064, "reward_std": 0.21857714653015137, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 65 }, { "clip_ratio/high_max": 0.002333878477656981, "clip_ratio/high_mean": 0.0009288478941016365, "clip_ratio/low_mean": 0.0007335979662457248, "clip_ratio/low_min": 2.1956602722639218e-05, "clip_ratio/region_mean": 0.0016624458621663507, "epoch": 0.15398075240594924, "grad_norm": 0.12191906571388245, "learning_rate": 1e-06, "loss": 0.0022, "step": 66 }, { "clip_ratio/high_max": 0.002172569580579875, "clip_ratio/high_mean": 0.0008620889511803398, "clip_ratio/low_mean": 0.0008169914053723915, "clip_ratio/low_min": 3.694098995765671e-05, "clip_ratio/region_mean": 0.00167908036019071, "epoch": 0.1563137941090697, "grad_norm": 0.12169674783945084, "learning_rate": 1e-06, "loss": 0.0021, "step": 67 }, { "clip_ratio/high_max": 0.0023263591720024124, "clip_ratio/high_mean": 0.0009342802095488878, "clip_ratio/low_mean": 0.0009031826884893235, "clip_ratio/low_min": 5.15037982040667e-05, "clip_ratio/region_mean": 0.0018374628780293278, "epoch": 0.15864683581219013, "grad_norm": 0.12440544366836548, "learning_rate": 1e-06, "loss": 0.002, "step": 68 }, { "clip_ratio/high_max": 0.0018946796481031924, "clip_ratio/high_mean": 0.0008617473213234916, "clip_ratio/low_mean": 0.0006712405856887926, "clip_ratio/low_min": 2.0611231775546912e-05, "clip_ratio/region_mean": 0.0015329879242926836, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3917.0, "completions/mean_length": 706.7098388671875, "completions/mean_terminated_length": 605.4207153320312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1609798775153106, "grad_norm": 0.13245128095149994, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 10628714.0, "reward": 0.559151828289032, "reward_std": 0.23240120708942413, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 69 }, { "clip_ratio/high_max": 0.0024722638409002684, "clip_ratio/high_mean": 0.0009684698052296881, "clip_ratio/low_mean": 0.0007125175779947313, "clip_ratio/low_min": 3.960115100198891e-05, "clip_ratio/region_mean": 0.0016809873850434087, "epoch": 0.16331291921843102, "grad_norm": 0.13081303238868713, "learning_rate": 1e-06, "loss": 0.0076, "step": 70 }, { "clip_ratio/high_max": 0.0022747882321709767, "clip_ratio/high_mean": 0.0009260399037884781, "clip_ratio/low_mean": 0.000759382968681166, "clip_ratio/low_min": 4.311801512812963e-05, "clip_ratio/region_mean": 0.0016854228488227818, "epoch": 0.16564596092155148, "grad_norm": 0.1290624439716339, "learning_rate": 1e-06, "loss": 0.0076, "step": 71 }, { "clip_ratio/high_max": 0.0023498402879340574, "clip_ratio/high_mean": 0.0009991134465963114, "clip_ratio/low_mean": 0.0007842375252948841, "clip_ratio/low_min": 4.8880713620746974e-05, "clip_ratio/region_mean": 0.0017833509882621001, "epoch": 0.1679790026246719, "grad_norm": 0.1262524425983429, "learning_rate": 1e-06, "loss": 0.0075, "step": 72 }, { "clip_ratio/high_max": 0.0027180942706763744, "clip_ratio/high_mean": 0.0011511004449857865, "clip_ratio/low_mean": 0.0006964103449718095, "clip_ratio/low_min": 3.421556357352529e-05, "clip_ratio/region_mean": 0.0018475107644917443, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3315.0, "completions/mean_length": 680.4319458007812, "completions/mean_terminated_length": 578.3574829101562, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.17031204432779237, "grad_norm": 0.13039037585258484, "learning_rate": 1e-06, "loss": -0.0199, "num_tokens": 11226341.0, "reward": 0.660714328289032, "reward_std": 0.23322537541389465, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 73 }, { "clip_ratio/high_max": 0.0024872794601833448, "clip_ratio/high_mean": 0.0011051116780436132, "clip_ratio/low_mean": 0.0007465089693141636, "clip_ratio/low_min": 6.628324354096549e-05, "clip_ratio/region_mean": 0.0018516206691856496, "epoch": 0.1726450860309128, "grad_norm": 0.12996259331703186, "learning_rate": 1e-06, "loss": -0.0199, "step": 74 }, { "clip_ratio/high_max": 0.0028027194930473343, "clip_ratio/high_mean": 0.0012348731033853255, "clip_ratio/low_mean": 0.0008350134203283233, "clip_ratio/low_min": 6.279769513639621e-05, "clip_ratio/region_mean": 0.0020698865337180905, "epoch": 0.17497812773403323, "grad_norm": 0.1261185258626938, "learning_rate": 1e-06, "loss": -0.0201, "step": 75 }, { "clip_ratio/high_max": 0.0032098262090585195, "clip_ratio/high_mean": 0.0013133431639289483, "clip_ratio/low_mean": 0.0007700925461904262, "clip_ratio/low_min": 4.5335311369854026e-05, "clip_ratio/region_mean": 0.002083435691019986, "epoch": 0.1773111694371537, "grad_norm": 0.1239931732416153, "learning_rate": 1e-06, "loss": -0.0201, "step": 76 }, { "clip_ratio/high_max": 0.0019031432893825695, "clip_ratio/high_mean": 0.0009410108505107928, "clip_ratio/low_mean": 0.0006787779257138027, "clip_ratio/low_min": 5.3184265198069625e-05, "clip_ratio/region_mean": 0.0016197887598536909, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 593.59375, "completions/mean_terminated_length": 562.04052734375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.17964421114027412, "grad_norm": 0.1269347369670868, "learning_rate": 1e-06, "loss": 0.0066, "num_tokens": 11816601.0, "reward": 0.6495535969734192, "reward_std": 0.21132300794124603, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 77 }, { "clip_ratio/high_max": 0.0020921051764162257, "clip_ratio/high_mean": 0.0009143478564510588, "clip_ratio/low_mean": 0.0007781102540320717, "clip_ratio/low_min": 4.562021877063671e-05, "clip_ratio/region_mean": 0.0016924581068451516, "epoch": 0.18197725284339458, "grad_norm": 0.1262543946504593, "learning_rate": 1e-06, "loss": 0.0064, "step": 78 }, { "clip_ratio/high_max": 0.00232102838344872, "clip_ratio/high_mean": 0.0010060116510430817, "clip_ratio/low_mean": 0.0008259733658633195, "clip_ratio/low_min": 9.44464636631892e-05, "clip_ratio/region_mean": 0.00183198502054438, "epoch": 0.184310294546515, "grad_norm": 0.12525974214076996, "learning_rate": 1e-06, "loss": 0.0064, "step": 79 }, { "clip_ratio/high_max": 0.002366886088566389, "clip_ratio/high_mean": 0.0010620286157063674, "clip_ratio/low_mean": 0.0008741377005208051, "clip_ratio/low_min": 7.721427664364455e-05, "clip_ratio/region_mean": 0.0019361662562005222, "epoch": 0.18664333624963547, "grad_norm": 0.1252414882183075, "learning_rate": 1e-06, "loss": 0.0063, "step": 80 }, { "clip_ratio/high_max": 0.0018435443125781603, "clip_ratio/high_mean": 0.0007656542757104035, "clip_ratio/low_mean": 0.0005714547769457567, "clip_ratio/low_min": 1.332338524662191e-05, "clip_ratio/region_mean": 0.0013371090353757609, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3419.0, "completions/mean_length": 646.0457763671875, "completions/mean_terminated_length": 567.2796630859375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.1889763779527559, "grad_norm": 0.1250150203704834, "learning_rate": 1e-06, "loss": 0.0102, "num_tokens": 12395730.0, "reward": 0.598214328289032, "reward_std": 0.18430186808109283, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 81 }, { "clip_ratio/high_max": 0.0019822753747575916, "clip_ratio/high_mean": 0.0008148421766236424, "clip_ratio/low_mean": 0.0005596559967671055, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013744982024945784, "epoch": 0.19130941965587636, "grad_norm": 0.12403535842895508, "learning_rate": 1e-06, "loss": 0.0101, "step": 82 }, { "clip_ratio/high_max": 0.001799621146346908, "clip_ratio/high_mean": 0.0008243617266998626, "clip_ratio/low_mean": 0.0006454474469137494, "clip_ratio/low_min": 5.0011256462312303e-05, "clip_ratio/region_mean": 0.001469809198169969, "epoch": 0.1936424613589968, "grad_norm": 0.12789423763751984, "learning_rate": 1e-06, "loss": 0.0101, "step": 83 }, { "clip_ratio/high_max": 0.0021637038080370985, "clip_ratio/high_mean": 0.0009658185390435392, "clip_ratio/low_mean": 0.000647538096018252, "clip_ratio/low_min": 2.5005628231156152e-05, "clip_ratio/region_mean": 0.0016133566678036004, "epoch": 0.19597550306211722, "grad_norm": 0.11844993382692337, "learning_rate": 1e-06, "loss": 0.0099, "step": 84 }, { "clip_ratio/high_max": 0.0016195920543395914, "clip_ratio/high_mean": 0.0007305502513190731, "clip_ratio/low_mean": 0.00042413621758896625, "clip_ratio/low_min": 2.5902445486281067e-05, "clip_ratio/region_mean": 0.0011546864770934917, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 694.1875610351562, "completions/mean_terminated_length": 628.3958740234375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.19830854476523768, "grad_norm": 0.10732998698949814, "learning_rate": 1e-06, "loss": 0.0246, "num_tokens": 13043522.0, "reward": 0.5290178656578064, "reward_std": 0.1879132241010666, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 85 }, { "clip_ratio/high_max": 0.001977817406441318, "clip_ratio/high_mean": 0.0008252691604866413, "clip_ratio/low_mean": 0.0005512036532309139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013764728064415976, "epoch": 0.2006415864683581, "grad_norm": 0.10541458427906036, "learning_rate": 1e-06, "loss": 0.0245, "step": 86 }, { "clip_ratio/high_max": 0.0018707476629060693, "clip_ratio/high_mean": 0.0008347092680196511, "clip_ratio/low_mean": 0.0005563870163314277, "clip_ratio/low_min": 2.986857907671947e-05, "clip_ratio/region_mean": 0.001391096335282782, "epoch": 0.20297462817147857, "grad_norm": 0.10650135576725006, "learning_rate": 1e-06, "loss": 0.0245, "step": 87 }, { "clip_ratio/high_max": 0.0019968881679233164, "clip_ratio/high_mean": 0.000803314738732297, "clip_ratio/low_mean": 0.0005893544366699643, "clip_ratio/low_min": 1.4934289538359735e-05, "clip_ratio/region_mean": 0.0013926691899541765, "epoch": 0.205307669874599, "grad_norm": 0.10585729777812958, "learning_rate": 1e-06, "loss": 0.0244, "step": 88 }, { "clip_ratio/high_max": 0.0017711450818751473, "clip_ratio/high_mean": 0.0007248113288369495, "clip_ratio/low_mean": 0.0005172562141524395, "clip_ratio/low_min": 1.4178765923134051e-05, "clip_ratio/region_mean": 0.0012420675193425268, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3155.0, "completions/mean_length": 707.794677734375, "completions/mean_terminated_length": 602.5224609375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.20764071157771946, "grad_norm": 0.12869536876678467, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 13651450.0, "reward": 0.637276828289032, "reward_std": 0.18129737675189972, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 89 }, { "clip_ratio/high_max": 0.0019154321453243028, "clip_ratio/high_mean": 0.0007747264771751361, "clip_ratio/low_mean": 0.0005645960791298421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001339322541753063, "epoch": 0.2099737532808399, "grad_norm": 0.12464048713445663, "learning_rate": 1e-06, "loss": -0.0065, "step": 90 }, { "clip_ratio/high_max": 0.0019451008993200958, "clip_ratio/high_mean": 0.0008710344573046314, "clip_ratio/low_mean": 0.0006486630554718431, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015196975255094003, "epoch": 0.21230679498396035, "grad_norm": 0.12116258591413498, "learning_rate": 1e-06, "loss": -0.0066, "step": 91 }, { "clip_ratio/high_max": 0.002008242765441537, "clip_ratio/high_mean": 0.0008125443891913164, "clip_ratio/low_mean": 0.0007152098842198029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001527754280687077, "epoch": 0.21463983668708078, "grad_norm": 0.11910553276538849, "learning_rate": 1e-06, "loss": -0.0067, "step": 92 }, { "clip_ratio/high_max": 0.0018941184753202833, "clip_ratio/high_mean": 0.0007535947170254076, "clip_ratio/low_mean": 0.0004500252680372796, "clip_ratio/low_min": 1.5907355191302486e-05, "clip_ratio/region_mean": 0.001203619995067129, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2767.0, "completions/mean_length": 705.9219360351562, "completions/mean_terminated_length": 628.5228271484375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.2169728783902012, "grad_norm": 0.1125323548913002, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 14286332.0, "reward": 0.5535714626312256, "reward_std": 0.19163475930690765, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 93 }, { "clip_ratio/high_max": 0.0021611167867376935, "clip_ratio/high_mean": 0.0007815511544322362, "clip_ratio/low_mean": 0.0005014456273784162, "clip_ratio/low_min": 1.736834747134708e-05, "clip_ratio/region_mean": 0.0012829967672587372, "epoch": 0.21930592009332167, "grad_norm": 0.11015673726797104, "learning_rate": 1e-06, "loss": -0.0053, "step": 94 }, { "clip_ratio/high_max": 0.0020602601434802637, "clip_ratio/high_mean": 0.0008277097422251245, "clip_ratio/low_mean": 0.0005118875869811745, "clip_ratio/low_min": 2.3599455744260922e-05, "clip_ratio/region_mean": 0.0013395973219303414, "epoch": 0.2216389617964421, "grad_norm": 0.11136667430400848, "learning_rate": 1e-06, "loss": -0.0054, "step": 95 }, { "clip_ratio/high_max": 0.002315436089702416, "clip_ratio/high_mean": 0.0008609901287854882, "clip_ratio/low_mean": 0.0005434525282907998, "clip_ratio/low_min": 1.5907355191302486e-05, "clip_ratio/region_mean": 0.0014044426607142668, "epoch": 0.22397200349956256, "grad_norm": 0.10752148181200027, "learning_rate": 1e-06, "loss": -0.0054, "step": 96 }, { "clip_ratio/high_max": 0.0016625657699478325, "clip_ratio/high_mean": 0.0006201890655574971, "clip_ratio/low_mean": 0.000499386642331956, "clip_ratio/low_min": 1.2033115126541816e-05, "clip_ratio/region_mean": 0.0011195757197128842, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 797.1551513671875, "completions/mean_terminated_length": 651.052490234375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.226305045202683, "grad_norm": 0.10692703723907471, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 14930167.0, "reward": 0.5457589626312256, "reward_std": 0.17107722163200378, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 97 }, { "clip_ratio/high_max": 0.0018763612533803098, "clip_ratio/high_mean": 0.0007112914427125361, "clip_ratio/low_mean": 0.0005139210170455044, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00122521246157703, "epoch": 0.22863808690580345, "grad_norm": 0.10555337369441986, "learning_rate": 1e-06, "loss": -0.0092, "step": 98 }, { "clip_ratio/high_max": 0.0016491648311784957, "clip_ratio/high_mean": 0.0006668064925179351, "clip_ratio/low_mean": 0.0006113593735790346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001278165840631118, "epoch": 0.23097112860892388, "grad_norm": 0.1043100655078888, "learning_rate": 1e-06, "loss": -0.0093, "step": 99 }, { "clip_ratio/high_max": 0.00191963902398129, "clip_ratio/high_mean": 0.0007225982008094434, "clip_ratio/low_mean": 0.0005822008433824521, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001304799039644422, "epoch": 0.23330417031204434, "grad_norm": 0.10567686706781387, "learning_rate": 1e-06, "loss": -0.0093, "step": 100 }, { "clip_ratio/high_max": 0.0016583305387030123, "clip_ratio/high_mean": 0.0007518018992414, "clip_ratio/low_mean": 0.0007701156791881658, "clip_ratio/low_min": 5.871514713362558e-05, "clip_ratio/region_mean": 0.0015219175402307883, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3472.0, "completions/mean_length": 699.763427734375, "completions/mean_terminated_length": 606.2889404296875, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.23563721201516477, "grad_norm": 0.13025298714637756, "learning_rate": 1e-06, "loss": 0.0202, "num_tokens": 15546323.0, "reward": 0.5245535969734192, "reward_std": 0.21591150760650635, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 101 }, { "clip_ratio/high_max": 0.002065653068711981, "clip_ratio/high_mean": 0.0008601816971349763, "clip_ratio/low_mean": 0.0007794047523930203, "clip_ratio/low_min": 7.620711767231114e-05, "clip_ratio/region_mean": 0.0016395864731748588, "epoch": 0.2379702537182852, "grad_norm": 0.12589409947395325, "learning_rate": 1e-06, "loss": 0.0201, "step": 102 }, { "clip_ratio/high_max": 0.0020022314784000628, "clip_ratio/high_mean": 0.0008255693137471098, "clip_ratio/low_mean": 0.0007917809125501662, "clip_ratio/low_min": 7.215025198092917e-05, "clip_ratio/region_mean": 0.0016173501862795092, "epoch": 0.24030329542140566, "grad_norm": 0.1242484375834465, "learning_rate": 1e-06, "loss": 0.02, "step": 103 }, { "clip_ratio/high_max": 0.0019035753612115514, "clip_ratio/high_mean": 0.0009137180004472611, "clip_ratio/low_mean": 0.0008719772231415845, "clip_ratio/low_min": 0.00010575910619081696, "clip_ratio/region_mean": 0.0017856952326837927, "epoch": 0.2426363371245261, "grad_norm": 0.1210545226931572, "learning_rate": 1e-06, "loss": 0.0199, "step": 104 }, { "clip_ratio/high_max": 0.0022454316713265143, "clip_ratio/high_mean": 0.000870076099090511, "clip_ratio/low_mean": 0.0005405405645433348, "clip_ratio/low_min": 1.1241007086937316e-05, "clip_ratio/region_mean": 0.0014106166454439517, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3474.0, "completions/mean_length": 702.9375610351562, "completions/mean_terminated_length": 593.48388671875, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 0.24496937882764655, "grad_norm": 0.13943475484848022, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 16149587.0, "reward": 0.582589328289032, "reward_std": 0.20993182063102722, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 105 }, { "clip_ratio/high_max": 0.0022179896768648177, "clip_ratio/high_mean": 0.0009223711076629115, "clip_ratio/low_mean": 0.0006832732597104041, "clip_ratio/low_min": 7.951450606924482e-05, "clip_ratio/region_mean": 0.0016056443710112944, "epoch": 0.24730242053076698, "grad_norm": 0.1324169635772705, "learning_rate": 1e-06, "loss": -0.0113, "step": 106 }, { "clip_ratio/high_max": 0.0023009943761280738, "clip_ratio/high_mean": 0.00090966447169194, "clip_ratio/low_mean": 0.0006601435015909374, "clip_ratio/low_min": 4.010021802969277e-05, "clip_ratio/region_mean": 0.001569807980558835, "epoch": 0.24963546223388744, "grad_norm": 0.13063472509384155, "learning_rate": 1e-06, "loss": -0.0114, "step": 107 }, { "clip_ratio/high_max": 0.0026666615303838626, "clip_ratio/high_mean": 0.0010358955296396744, "clip_ratio/low_mean": 0.0007489463259844342, "clip_ratio/low_min": 3.096954242209904e-05, "clip_ratio/region_mean": 0.0017848418283392675, "epoch": 0.25196850393700787, "grad_norm": 0.12885810434818268, "learning_rate": 1e-06, "loss": -0.0115, "step": 108 }, { "clip_ratio/high_max": 0.0020536949050438125, "clip_ratio/high_mean": 0.0008293388964375481, "clip_ratio/low_mean": 0.0006642993957939325, "clip_ratio/low_min": 5.731327473768033e-05, "clip_ratio/region_mean": 0.00149363829405047, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 657.0457763671875, "completions/mean_terminated_length": 598.4937744140625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.2543015456401283, "grad_norm": 0.13001294434070587, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 16766852.0, "reward": 0.5714285969734192, "reward_std": 0.18979185819625854, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 109 }, { "clip_ratio/high_max": 0.0018642976283445023, "clip_ratio/high_mean": 0.000758285681513371, "clip_ratio/low_mean": 0.0006911922246217728, "clip_ratio/low_min": 6.702373775624437e-05, "clip_ratio/region_mean": 0.0014494779243250377, "epoch": 0.2566345873432488, "grad_norm": 0.1258811205625534, "learning_rate": 1e-06, "loss": 0.0057, "step": 110 }, { "clip_ratio/high_max": 0.002164812249247916, "clip_ratio/high_mean": 0.0008751272252993658, "clip_ratio/low_mean": 0.0007665609100513393, "clip_ratio/low_min": 5.334565867087804e-05, "clip_ratio/region_mean": 0.001641688148083631, "epoch": 0.2589676290463692, "grad_norm": 0.12354160845279694, "learning_rate": 1e-06, "loss": 0.0057, "step": 111 }, { "clip_ratio/high_max": 0.0021916895493632182, "clip_ratio/high_mean": 0.0008869685170793673, "clip_ratio/low_mean": 0.0008263004147011088, "clip_ratio/low_min": 6.131671671028016e-05, "clip_ratio/region_mean": 0.0017132688881247304, "epoch": 0.26130067074948965, "grad_norm": 0.12259406596422195, "learning_rate": 1e-06, "loss": 0.0055, "step": 112 }, { "clip_ratio/high_max": 0.001697391977359075, "clip_ratio/high_mean": 0.0006851519665360684, "clip_ratio/low_mean": 0.0006891176126373466, "clip_ratio/low_min": 7.1352720624418e-05, "clip_ratio/region_mean": 0.0013742695373366587, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3655.0, "completions/mean_length": 711.0535888671875, "completions/mean_terminated_length": 617.889892578125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 0.2636337124526101, "grad_norm": 0.12998297810554504, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 17394636.0, "reward": 0.5223214626312256, "reward_std": 0.20102617144584656, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 113 }, { "clip_ratio/high_max": 0.0017535710612719413, "clip_ratio/high_mean": 0.0006981436126807239, "clip_ratio/low_mean": 0.0008300278659589821, "clip_ratio/low_min": 0.0001619695585759473, "clip_ratio/region_mean": 0.0015281714950106107, "epoch": 0.2659667541557305, "grad_norm": 0.12897886335849762, "learning_rate": 1e-06, "loss": -0.0158, "step": 114 }, { "clip_ratio/high_max": 0.0020468809198064264, "clip_ratio/high_mean": 0.0008466970393783413, "clip_ratio/low_mean": 0.0008826452158245957, "clip_ratio/low_min": 0.00015975156929926015, "clip_ratio/region_mean": 0.0017293422424700111, "epoch": 0.268299795858851, "grad_norm": 0.12687930464744568, "learning_rate": 1e-06, "loss": -0.0158, "step": 115 }, { "clip_ratio/high_max": 0.002170760475564748, "clip_ratio/high_mean": 0.0008199298881663708, "clip_ratio/low_mean": 0.000861517079101759, "clip_ratio/low_min": 0.00012403995697241044, "clip_ratio/region_mean": 0.001681446927250363, "epoch": 0.27063283756197143, "grad_norm": 0.12233294546604156, "learning_rate": 1e-06, "loss": -0.0159, "step": 116 }, { "clip_ratio/high_max": 0.0023675355914747342, "clip_ratio/high_mean": 0.0009463731294090394, "clip_ratio/low_mean": 0.0006463080644607544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001592681190231815, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 641.9732666015625, "completions/mean_terminated_length": 575.1717529296875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.27296587926509186, "grad_norm": 0.1370822638273239, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 17995540.0, "reward": 0.5859375, "reward_std": 0.2263127714395523, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 117 }, { "clip_ratio/high_max": 0.0023808866244507954, "clip_ratio/high_mean": 0.0010071201832033694, "clip_ratio/low_mean": 0.0006476468661276158, "clip_ratio/low_min": 1.4744043255632278e-05, "clip_ratio/region_mean": 0.0016547670675208792, "epoch": 0.2752989209682123, "grad_norm": 0.13685859739780426, "learning_rate": 1e-06, "loss": 0.0036, "step": 118 }, { "clip_ratio/high_max": 0.002364542982832063, "clip_ratio/high_mean": 0.0010847648300114088, "clip_ratio/low_mean": 0.0007723511116637383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018571159744169563, "epoch": 0.2776319626713328, "grad_norm": 0.13253997266292572, "learning_rate": 1e-06, "loss": 0.0034, "step": 119 }, { "clip_ratio/high_max": 0.002691040266654454, "clip_ratio/high_mean": 0.0010794019362947438, "clip_ratio/low_mean": 0.0008650881063658744, "clip_ratio/low_min": 4.414796330820536e-05, "clip_ratio/region_mean": 0.0019444899953668937, "epoch": 0.2799650043744532, "grad_norm": 0.13256323337554932, "learning_rate": 1e-06, "loss": 0.0033, "step": 120 }, { "clip_ratio/high_max": 0.0018208038454758935, "clip_ratio/high_mean": 0.0007685111413593404, "clip_ratio/low_mean": 0.0005416209351096768, "clip_ratio/low_min": 5.682390292349737e-05, "clip_ratio/region_mean": 0.0013101320873829536, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 776.3292846679688, "completions/mean_terminated_length": 665.2906494140625, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.28229804607757364, "grad_norm": 0.11596595495939255, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 18657227.0, "reward": 0.5613839626312256, "reward_std": 0.20169946551322937, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 121 }, { "clip_ratio/high_max": 0.0019638911999209085, "clip_ratio/high_mean": 0.0007903282944425882, "clip_ratio/low_mean": 0.0006332460270641604, "clip_ratio/low_min": 4.578135758492863e-05, "clip_ratio/region_mean": 0.0014235743146855384, "epoch": 0.28463108778069407, "grad_norm": 0.11225331574678421, "learning_rate": 1e-06, "loss": -0.003, "step": 122 }, { "clip_ratio/high_max": 0.0020143422298133373, "clip_ratio/high_mean": 0.0007923571538412943, "clip_ratio/low_mean": 0.0006825453601777554, "clip_ratio/low_min": 6.15502985965577e-05, "clip_ratio/region_mean": 0.0014749025249329861, "epoch": 0.2869641294838145, "grad_norm": 0.11058894544839859, "learning_rate": 1e-06, "loss": -0.003, "step": 123 }, { "clip_ratio/high_max": 0.00207445933483541, "clip_ratio/high_mean": 0.0008512227577739395, "clip_ratio/low_mean": 0.0006609621759707807, "clip_ratio/low_min": 5.902328211959684e-05, "clip_ratio/region_mean": 0.0015121849428396672, "epoch": 0.289297171186935, "grad_norm": 0.11066972464323044, "learning_rate": 1e-06, "loss": -0.0031, "step": 124 }, { "clip_ratio/high_max": 0.0021815118452650495, "clip_ratio/high_mean": 0.0008497404996887781, "clip_ratio/low_mean": 0.0004772272868649452, "clip_ratio/low_min": 2.0509113710431848e-05, "clip_ratio/region_mean": 0.001326967823843006, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2720.0, "completions/mean_length": 705.1797485351562, "completions/mean_terminated_length": 595.7984008789062, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.2916302128900554, "grad_norm": 0.13661257922649384, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 19265220.0, "reward": 0.609375, "reward_std": 0.18528781831264496, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 125 }, { "clip_ratio/high_max": 0.0021811188526044134, "clip_ratio/high_mean": 0.0008807081148916041, "clip_ratio/low_mean": 0.0005780017290817341, "clip_ratio/low_min": 2.8875028874608688e-05, "clip_ratio/region_mean": 0.0014587098012270872, "epoch": 0.29396325459317585, "grad_norm": 0.1319681853055954, "learning_rate": 1e-06, "loss": 0.0137, "step": 126 }, { "clip_ratio/high_max": 0.002105122650391422, "clip_ratio/high_mean": 0.0008940856241679285, "clip_ratio/low_mean": 0.0006709501121804351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015650357236154377, "epoch": 0.2962962962962963, "grad_norm": 0.14205984771251678, "learning_rate": 1e-06, "loss": 0.0136, "step": 127 }, { "clip_ratio/high_max": 0.0024559525918448344, "clip_ratio/high_mean": 0.0009362378386867931, "clip_ratio/low_mean": 0.0007356516161962645, "clip_ratio/low_min": 3.0419239919865504e-05, "clip_ratio/region_mean": 0.0016718894694349729, "epoch": 0.29862933799941677, "grad_norm": 0.13000363111495972, "learning_rate": 1e-06, "loss": 0.0135, "step": 128 }, { "clip_ratio/high_max": 0.0019030739968002308, "clip_ratio/high_mean": 0.0006965993607082055, "clip_ratio/low_mean": 0.0006350407384161372, "clip_ratio/low_min": 7.164487760746852e-05, "clip_ratio/region_mean": 0.0013316400873009115, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3387.0, "completions/mean_length": 629.5814819335938, "completions/mean_terminated_length": 554.4822998046875, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.3009623797025372, "grad_norm": 0.13182252645492554, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 19838037.0, "reward": 0.5691964626312256, "reward_std": 0.16630510985851288, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 129 }, { "clip_ratio/high_max": 0.0019440464311628602, "clip_ratio/high_mean": 0.0007428180806527962, "clip_ratio/low_mean": 0.0006763731698811171, "clip_ratio/low_min": 4.770672876475146e-05, "clip_ratio/region_mean": 0.0014191912378009874, "epoch": 0.30329542140565763, "grad_norm": 0.12787501513957977, "learning_rate": 1e-06, "loss": -0.0006, "step": 130 }, { "clip_ratio/high_max": 0.0021150591092009563, "clip_ratio/high_mean": 0.0008707102069820394, "clip_ratio/low_mean": 0.0007152069083531387, "clip_ratio/low_min": 4.3578196709859185e-05, "clip_ratio/region_mean": 0.0015859171362535562, "epoch": 0.30562846310877806, "grad_norm": 0.12346763163805008, "learning_rate": 1e-06, "loss": -0.0008, "step": 131 }, { "clip_ratio/high_max": 0.002172308690205682, "clip_ratio/high_mean": 0.0008703252351551782, "clip_ratio/low_mean": 0.0008561224449294969, "clip_ratio/low_min": 6.992353792156791e-05, "clip_ratio/region_mean": 0.0017264476628042758, "epoch": 0.3079615048118985, "grad_norm": 0.12661373615264893, "learning_rate": 1e-06, "loss": -0.0008, "step": 132 }, { "clip_ratio/high_max": 0.0016295224777422845, "clip_ratio/high_mean": 0.0006557831056852592, "clip_ratio/low_mean": 0.0005799791524623288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012357622545096092, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3400.0, "completions/mean_length": 681.9006958007812, "completions/mean_terminated_length": 615.8713989257812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.310294546515019, "grad_norm": 0.12366899102926254, "learning_rate": 1e-06, "loss": 0.0032, "num_tokens": 20462780.0, "reward": 0.5725446939468384, "reward_std": 0.18144823610782623, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 133 }, { "clip_ratio/high_max": 0.0019445515772531508, "clip_ratio/high_mean": 0.0007925075506136636, "clip_ratio/low_mean": 0.0006260709888010751, "clip_ratio/low_min": 2.2393407562049106e-05, "clip_ratio/region_mean": 0.0014185785184963606, "epoch": 0.3126275882181394, "grad_norm": 0.12053171545267105, "learning_rate": 1e-06, "loss": 0.0031, "step": 134 }, { "clip_ratio/high_max": 0.002057051740848692, "clip_ratio/high_mean": 0.000796252378677309, "clip_ratio/low_mean": 0.0006900128737470368, "clip_ratio/low_min": 2.3946360670379363e-05, "clip_ratio/region_mean": 0.0014862652569718193, "epoch": 0.31496062992125984, "grad_norm": 0.11586639285087585, "learning_rate": 1e-06, "loss": 0.003, "step": 135 }, { "clip_ratio/high_max": 0.00205574548090226, "clip_ratio/high_mean": 0.0008225631409004563, "clip_ratio/low_mean": 0.0007366543359239586, "clip_ratio/low_min": 2.2393407562049106e-05, "clip_ratio/region_mean": 0.0015592174604535103, "epoch": 0.31729367162438027, "grad_norm": 0.11688009649515152, "learning_rate": 1e-06, "loss": 0.0029, "step": 136 }, { "clip_ratio/high_max": 0.002024527595494874, "clip_ratio/high_mean": 0.0008082161057245685, "clip_ratio/low_mean": 0.0005800189078399853, "clip_ratio/low_min": 1.8047934645437635e-05, "clip_ratio/region_mean": 0.0013882350067433435, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 740.0647583007812, "completions/mean_terminated_length": 627.8131103515625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.31962671332750076, "grad_norm": 0.13833001255989075, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 21090750.0, "reward": 0.6171875, "reward_std": 0.19227458536624908, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 137 }, { "clip_ratio/high_max": 0.0022252232884056866, "clip_ratio/high_mean": 0.0008493184159306111, "clip_ratio/low_mean": 0.0006523041329273838, "clip_ratio/low_min": 2.575105281721335e-05, "clip_ratio/region_mean": 0.0015016225625004154, "epoch": 0.3219597550306212, "grad_norm": 0.1322672963142395, "learning_rate": 1e-06, "loss": -0.0079, "step": 138 }, { "clip_ratio/high_max": 0.0025541731010889634, "clip_ratio/high_mean": 0.0009663519540481502, "clip_ratio/low_mean": 0.0007721306756138802, "clip_ratio/low_min": 4.925636767438846e-05, "clip_ratio/region_mean": 0.0017384826496709138, "epoch": 0.3242927967337416, "grad_norm": 0.13279646635055542, "learning_rate": 1e-06, "loss": -0.0081, "step": 139 }, { "clip_ratio/high_max": 0.0024515801342204213, "clip_ratio/high_mean": 0.0009114635831792839, "clip_ratio/low_mean": 0.0007970291981109767, "clip_ratio/low_min": 8.409956171817612e-05, "clip_ratio/region_mean": 0.0017084927821997553, "epoch": 0.32662583843686205, "grad_norm": 0.13724446296691895, "learning_rate": 1e-06, "loss": -0.0081, "step": 140 }, { "clip_ratio/high_max": 0.0021759426381322555, "clip_ratio/high_mean": 0.0009506453116046032, "clip_ratio/low_mean": 0.0007584911727462895, "clip_ratio/low_min": 6.67031317789224e-05, "clip_ratio/region_mean": 0.0017091364570660517, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3260.0, "completions/mean_length": 750.388427734375, "completions/mean_terminated_length": 642.4654541015625, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 0.3289588801399825, "grad_norm": 0.14134296774864197, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 21741234.0, "reward": 0.5714285969734192, "reward_std": 0.2296549528837204, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 141 }, { "clip_ratio/high_max": 0.0024316191629623063, "clip_ratio/high_mean": 0.0009980091144825565, "clip_ratio/low_mean": 0.0008398861700698035, "clip_ratio/low_min": 2.4851598027453292e-05, "clip_ratio/region_mean": 0.0018378952809143811, "epoch": 0.33129192184310297, "grad_norm": 0.1454172432422638, "learning_rate": 1e-06, "loss": -0.0085, "step": 142 }, { "clip_ratio/high_max": 0.0026167565447394736, "clip_ratio/high_mean": 0.0010993376454280224, "clip_ratio/low_mean": 0.0009133251132880105, "clip_ratio/low_min": 9.388228863826953e-05, "clip_ratio/region_mean": 0.002012662727793213, "epoch": 0.3336249635462234, "grad_norm": 0.13567112386226654, "learning_rate": 1e-06, "loss": -0.0086, "step": 143 }, { "clip_ratio/high_max": 0.0025247305602533743, "clip_ratio/high_mean": 0.001162609776656609, "clip_ratio/low_mean": 0.0009580292135069612, "clip_ratio/low_min": 2.1999296222929843e-05, "clip_ratio/region_mean": 0.002120639015629422, "epoch": 0.3359580052493438, "grad_norm": 0.1326664388179779, "learning_rate": 1e-06, "loss": -0.0087, "step": 144 }, { "clip_ratio/high_max": 0.0018576670699985698, "clip_ratio/high_mean": 0.0008139792898873566, "clip_ratio/low_mean": 0.000670039120450383, "clip_ratio/low_min": 2.8118673071730882e-05, "clip_ratio/region_mean": 0.0014840184157947078, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2907.0, "completions/mean_length": 681.7734375, "completions/mean_terminated_length": 587.8038940429688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.33829104695246426, "grad_norm": 0.14912009239196777, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 22330527.0, "reward": 0.5758928656578064, "reward_std": 0.21725811064243317, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 145 }, { "clip_ratio/high_max": 0.0025272179118474014, "clip_ratio/high_mean": 0.0010477910109329969, "clip_ratio/low_mean": 0.0007653703396499623, "clip_ratio/low_min": 5.663632146024611e-05, "clip_ratio/region_mean": 0.0018131613687728532, "epoch": 0.34062408865558474, "grad_norm": 0.14827348291873932, "learning_rate": 1e-06, "loss": -0.0072, "step": 146 }, { "clip_ratio/high_max": 0.002495536347851157, "clip_ratio/high_mean": 0.0010808328224811703, "clip_ratio/low_mean": 0.0009262939474865561, "clip_ratio/low_min": 3.505966014927253e-05, "clip_ratio/region_mean": 0.0020071267208550125, "epoch": 0.3429571303587052, "grad_norm": 0.1410999745130539, "learning_rate": 1e-06, "loss": -0.0073, "step": 147 }, { "clip_ratio/high_max": 0.0025222120893886313, "clip_ratio/high_mean": 0.0010745862746261992, "clip_ratio/low_mean": 0.0010007284636230906, "clip_ratio/low_min": 9.22423450901988e-05, "clip_ratio/region_mean": 0.0020753147182404064, "epoch": 0.3452901720618256, "grad_norm": 0.13900841772556305, "learning_rate": 1e-06, "loss": -0.0073, "step": 148 }, { "clip_ratio/high_max": 0.001856691567809321, "clip_ratio/high_mean": 0.0007320679142139852, "clip_ratio/low_mean": 0.00043941574040218256, "clip_ratio/low_min": 1.2037750821036752e-05, "clip_ratio/region_mean": 0.001171483687357977, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2857.0, "completions/mean_length": 756.950927734375, "completions/mean_terminated_length": 600.9205322265625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.34762321376494604, "grad_norm": 0.131646066904068, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 22934251.0, "reward": 0.5881696939468384, "reward_std": 0.1741565614938736, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 149 }, { "clip_ratio/high_max": 0.0019837734871543944, "clip_ratio/high_mean": 0.000780138074333081, "clip_ratio/low_mean": 0.0005641352909151465, "clip_ratio/low_min": 1.2037750821036752e-05, "clip_ratio/region_mean": 0.0013442733616102487, "epoch": 0.34995625546806647, "grad_norm": 0.1266884207725525, "learning_rate": 1e-06, "loss": -0.0147, "step": 150 }, { "clip_ratio/high_max": 0.0021597502782242373, "clip_ratio/high_mean": 0.0008289266588690225, "clip_ratio/low_mean": 0.0006222497831913643, "clip_ratio/low_min": 1.230072848557029e-05, "clip_ratio/region_mean": 0.0014511764529743232, "epoch": 0.35228929717118695, "grad_norm": 0.12174595147371292, "learning_rate": 1e-06, "loss": -0.0148, "step": 151 }, { "clip_ratio/high_max": 0.002154038018488791, "clip_ratio/high_mean": 0.0008754699047130998, "clip_ratio/low_mean": 0.0007365460114669986, "clip_ratio/low_min": 2.460145697114058e-05, "clip_ratio/region_mean": 0.00161201594164595, "epoch": 0.3546223388743074, "grad_norm": 0.12020301818847656, "learning_rate": 1e-06, "loss": -0.0148, "step": 152 }, { "clip_ratio/high_max": 0.0022428504744311795, "clip_ratio/high_mean": 0.0008285467338282615, "clip_ratio/low_mean": 0.0007076413589857111, "clip_ratio/low_min": 1.1980065210082103e-05, "clip_ratio/region_mean": 0.0015361880941782147, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 724.7801513671875, "completions/mean_terminated_length": 599.920166015625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.3569553805774278, "grad_norm": 0.1613074690103531, "learning_rate": 1e-06, "loss": -0.016, "num_tokens": 23548446.0, "reward": 0.5613839626312256, "reward_std": 0.19839511811733246, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 153 }, { "clip_ratio/high_max": 0.002339055143238511, "clip_ratio/high_mean": 0.0009058997620741138, "clip_ratio/low_mean": 0.0006424031689675758, "clip_ratio/low_min": 4.490345145313768e-05, "clip_ratio/region_mean": 0.001548302905575838, "epoch": 0.35928842228054825, "grad_norm": 0.15127702057361603, "learning_rate": 1e-06, "loss": -0.0161, "step": 154 }, { "clip_ratio/high_max": 0.0029553086278610863, "clip_ratio/high_mean": 0.0010807084327097982, "clip_ratio/low_mean": 0.0008939045674196677, "clip_ratio/low_min": 3.684923558466835e-05, "clip_ratio/region_mean": 0.001974613027414307, "epoch": 0.36162146398366873, "grad_norm": 0.1411028504371643, "learning_rate": 1e-06, "loss": -0.0163, "step": 155 }, { "clip_ratio/high_max": 0.0028567789049702697, "clip_ratio/high_mean": 0.0010759717879409436, "clip_ratio/low_mean": 0.0010138653688045451, "clip_ratio/low_min": 8.965483993961243e-05, "clip_ratio/region_mean": 0.002089837238600012, "epoch": 0.36395450568678916, "grad_norm": 0.13891728222370148, "learning_rate": 1e-06, "loss": -0.0164, "step": 156 }, { "clip_ratio/high_max": 0.0019052833595196716, "clip_ratio/high_mean": 0.0007558979359600926, "clip_ratio/low_mean": 0.0005618926061288221, "clip_ratio/low_min": 1.3661202501680236e-05, "clip_ratio/region_mean": 0.0013177905493648723, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3188.0, "completions/mean_length": 712.7745971679688, "completions/mean_terminated_length": 607.6571044921875, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 0.3662875473899096, "grad_norm": 0.1499905288219452, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 24178852.0, "reward": 0.5625, "reward_std": 0.18693754076957703, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 157 }, { "clip_ratio/high_max": 0.0022266593587119132, "clip_ratio/high_mean": 0.0008535192591807572, "clip_ratio/low_mean": 0.0005987178856230457, "clip_ratio/low_min": 7.914327306934865e-05, "clip_ratio/region_mean": 0.0014522371420753188, "epoch": 0.36862058909303, "grad_norm": 0.14097832143306732, "learning_rate": 1e-06, "loss": -0.0184, "step": 158 }, { "clip_ratio/high_max": 0.0020058465452166274, "clip_ratio/high_mean": 0.0008499585565004963, "clip_ratio/low_mean": 0.0007910596796136815, "clip_ratio/low_min": 9.284633597417269e-05, "clip_ratio/region_mean": 0.001641018214286305, "epoch": 0.37095363079615046, "grad_norm": 0.13096092641353607, "learning_rate": 1e-06, "loss": -0.0185, "step": 159 }, { "clip_ratio/high_max": 0.0022000707976985723, "clip_ratio/high_mean": 0.0009491592900303658, "clip_ratio/low_mean": 0.0007415883028443204, "clip_ratio/low_min": 8.971292754722526e-05, "clip_ratio/region_mean": 0.0016907475728658028, "epoch": 0.37328667249927094, "grad_norm": 0.13571088016033173, "learning_rate": 1e-06, "loss": -0.0186, "step": 160 }, { "clip_ratio/high_max": 0.0017179410206153989, "clip_ratio/high_mean": 0.0006355128489303752, "clip_ratio/low_mean": 0.0005784768000012264, "clip_ratio/low_min": 2.933319137810031e-05, "clip_ratio/region_mean": 0.0012139896389271598, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 831.2489013671875, "completions/mean_terminated_length": 654.5682373046875, "completions/min_length": 206.0, "completions/min_terminated_length": 206.0, "epoch": 0.3756197142023914, "grad_norm": 0.13254326581954956, "learning_rate": 1e-06, "loss": -0.0197, "num_tokens": 24837019.0, "reward": 0.5212053656578064, "reward_std": 0.1868615597486496, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 161 }, { "clip_ratio/high_max": 0.002329044058569707, "clip_ratio/high_mean": 0.0008827270321489777, "clip_ratio/low_mean": 0.0006735311708325753, "clip_ratio/low_min": 1.187310044770129e-05, "clip_ratio/region_mean": 0.0015562582266284153, "epoch": 0.3779527559055118, "grad_norm": 0.1271568387746811, "learning_rate": 1e-06, "loss": -0.0199, "step": 162 }, { "clip_ratio/high_max": 0.0021151488945179153, "clip_ratio/high_mean": 0.0008055039343162207, "clip_ratio/low_mean": 0.0008194172060029814, "clip_ratio/low_min": 6.695355841657147e-05, "clip_ratio/region_mean": 0.0016249211257672869, "epoch": 0.38028579760863224, "grad_norm": 0.12323304265737534, "learning_rate": 1e-06, "loss": -0.02, "step": 163 }, { "clip_ratio/high_max": 0.0023609396230312996, "clip_ratio/high_mean": 0.0008703189359948738, "clip_ratio/low_mean": 0.0008447565996902995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017150755229522474, "epoch": 0.3826188393117527, "grad_norm": 0.11949825286865234, "learning_rate": 1e-06, "loss": -0.02, "step": 164 }, { "clip_ratio/high_max": 0.0016120658874569926, "clip_ratio/high_mean": 0.0006476519338320941, "clip_ratio/low_mean": 0.0004724546288343845, "clip_ratio/low_min": 2.183024844271131e-05, "clip_ratio/region_mean": 0.0011201065572095104, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3809.0, "completions/mean_length": 746.591552734375, "completions/mean_terminated_length": 622.5393676757812, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 0.38495188101487315, "grad_norm": 0.1378088742494583, "learning_rate": 1e-06, "loss": -0.0108, "num_tokens": 25466093.0, "reward": 0.53125, "reward_std": 0.17765209078788757, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 165 }, { "clip_ratio/high_max": 0.0019100851532130037, "clip_ratio/high_mean": 0.0007693097859373665, "clip_ratio/low_mean": 0.0006120883754192619, "clip_ratio/low_min": 1.1038502634619363e-05, "clip_ratio/region_mean": 0.0013813981604471337, "epoch": 0.3872849227179936, "grad_norm": 0.13167965412139893, "learning_rate": 1e-06, "loss": -0.0109, "step": 166 }, { "clip_ratio/high_max": 0.0020490221177169587, "clip_ratio/high_mean": 0.0007186526363511803, "clip_ratio/low_mean": 0.0005681742277374724, "clip_ratio/low_min": 1.5794794308021665e-05, "clip_ratio/region_mean": 0.0012868268458987586, "epoch": 0.389617964421114, "grad_norm": 0.13167418539524078, "learning_rate": 1e-06, "loss": -0.011, "step": 167 }, { "clip_ratio/high_max": 0.0022456580627476797, "clip_ratio/high_mean": 0.0008425064788752934, "clip_ratio/low_mean": 0.000783710835094098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001626217275770614, "epoch": 0.39195100612423445, "grad_norm": 0.1238115131855011, "learning_rate": 1e-06, "loss": -0.0111, "step": 168 }, { "clip_ratio/high_max": 0.002102460159221664, "clip_ratio/high_mean": 0.0007984250751178479, "clip_ratio/low_mean": 0.0005976143966108793, "clip_ratio/low_min": 1.253258506039856e-05, "clip_ratio/region_mean": 0.0013960394826426636, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 747.599365234375, "completions/mean_terminated_length": 635.5997314453125, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.39428404782735493, "grad_norm": 0.14102819561958313, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 26103294.0, "reward": 0.6015625, "reward_std": 0.20320719480514526, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 169 }, { "clip_ratio/high_max": 0.002048708603979321, "clip_ratio/high_mean": 0.000856896496770787, "clip_ratio/low_mean": 0.0007294035949598765, "clip_ratio/low_min": 2.252387821499724e-05, "clip_ratio/region_mean": 0.0015863000808167271, "epoch": 0.39661708953047536, "grad_norm": 0.13521471619606018, "learning_rate": 1e-06, "loss": 0.0136, "step": 170 }, { "clip_ratio/high_max": 0.0022176205384312198, "clip_ratio/high_mean": 0.0008204550431401003, "clip_ratio/low_mean": 0.00080624979455024, "clip_ratio/low_min": 1.025935671350453e-05, "clip_ratio/region_mean": 0.0016267048486042768, "epoch": 0.3989501312335958, "grad_norm": 0.132305309176445, "learning_rate": 1e-06, "loss": 0.0135, "step": 171 }, { "clip_ratio/high_max": 0.002695338276680559, "clip_ratio/high_mean": 0.0010746097032097168, "clip_ratio/low_mean": 0.0008721355006855447, "clip_ratio/low_min": 3.679356450447813e-05, "clip_ratio/region_mean": 0.0019467451784294099, "epoch": 0.4012831729367162, "grad_norm": 0.12492279708385468, "learning_rate": 1e-06, "loss": 0.0134, "step": 172 }, { "clip_ratio/high_max": 0.0018454058672432438, "clip_ratio/high_mean": 0.0007141997180042381, "clip_ratio/low_mean": 0.0004877425571976346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012019422865705565, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2615.0, "completions/mean_length": 666.3002319335938, "completions/mean_terminated_length": 567.8587646484375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 0.4036162146398367, "grad_norm": 0.14558936655521393, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 26684091.0, "reward": 0.6194196939468384, "reward_std": 0.17836888134479523, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 173 }, { "clip_ratio/high_max": 0.0020503912965068594, "clip_ratio/high_mean": 0.0008501818829245167, "clip_ratio/low_mean": 0.0005826316228194628, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001432813543942757, "epoch": 0.40594925634295714, "grad_norm": 0.13812094926834106, "learning_rate": 1e-06, "loss": -0.0113, "step": 174 }, { "clip_ratio/high_max": 0.0021088762587169185, "clip_ratio/high_mean": 0.000818512820842443, "clip_ratio/low_mean": 0.0007027353531157132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001521248159406241, "epoch": 0.4082822980460776, "grad_norm": 0.13025124371051788, "learning_rate": 1e-06, "loss": -0.0114, "step": 175 }, { "clip_ratio/high_max": 0.0024314813636010513, "clip_ratio/high_mean": 0.0009654291134211235, "clip_ratio/low_mean": 0.0008212085613195086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017866376874735579, "epoch": 0.410615339749198, "grad_norm": 0.1292300820350647, "learning_rate": 1e-06, "loss": -0.0115, "step": 176 }, { "clip_ratio/high_max": 0.0019029411487281322, "clip_ratio/high_mean": 0.0007594223316118587, "clip_ratio/low_mean": 0.0005212702262724633, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001280692555155838, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3785.0, "completions/mean_length": 846.482177734375, "completions/mean_terminated_length": 650.357421875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.41294838145231844, "grad_norm": 0.14513805508613586, "learning_rate": 1e-06, "loss": -0.0296, "num_tokens": 27327035.0, "reward": 0.5915178656578064, "reward_std": 0.195994034409523, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 177 }, { "clip_ratio/high_max": 0.0019132906018057838, "clip_ratio/high_mean": 0.0008263411073130555, "clip_ratio/low_mean": 0.0006613379118789453, "clip_ratio/low_min": 4.798547797690844e-05, "clip_ratio/region_mean": 0.0014876790010021068, "epoch": 0.4152814231554389, "grad_norm": 0.13300026953220367, "learning_rate": 1e-06, "loss": -0.0297, "step": 178 }, { "clip_ratio/high_max": 0.002202157396823168, "clip_ratio/high_mean": 0.0009413544539711438, "clip_ratio/low_mean": 0.0007774288296786835, "clip_ratio/low_min": 2.999307071149815e-05, "clip_ratio/region_mean": 0.0017187832781928591, "epoch": 0.41761446485855935, "grad_norm": 0.13014237582683563, "learning_rate": 1e-06, "loss": -0.0298, "step": 179 }, { "clip_ratio/high_max": 0.0022671789629384875, "clip_ratio/high_mean": 0.0009622640500310808, "clip_ratio/low_mean": 0.0008495632300764555, "clip_ratio/low_min": 6.966615637793439e-05, "clip_ratio/region_mean": 0.0018118272419087589, "epoch": 0.4199475065616798, "grad_norm": 0.127271369099617, "learning_rate": 1e-06, "loss": -0.0299, "step": 180 }, { "clip_ratio/high_max": 0.0017390164030075539, "clip_ratio/high_mean": 0.000820291770651238, "clip_ratio/low_mean": 0.0005455044865811942, "clip_ratio/low_min": 1.134713147621369e-05, "clip_ratio/region_mean": 0.0013657962263096124, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3679.0, "completions/mean_length": 823.5189819335938, "completions/mean_terminated_length": 682.5623168945312, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.4222805482648002, "grad_norm": 0.13623927533626556, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 28001804.0, "reward": 0.5691964626312256, "reward_std": 0.20613498985767365, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 181 }, { "clip_ratio/high_max": 0.0019057195422647055, "clip_ratio/high_mean": 0.0009002240440167952, "clip_ratio/low_mean": 0.0007010096378508024, "clip_ratio/low_min": 2.528395452827681e-05, "clip_ratio/region_mean": 0.0016012336964195129, "epoch": 0.4246135899679207, "grad_norm": 0.1361505389213562, "learning_rate": 1e-06, "loss": -0.0082, "step": 182 }, { "clip_ratio/high_max": 0.0020468775073823053, "clip_ratio/high_mean": 0.000950065310462378, "clip_ratio/low_mean": 0.0007298784039448947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001679943688941421, "epoch": 0.42694663167104113, "grad_norm": 0.16369010508060455, "learning_rate": 1e-06, "loss": -0.0083, "step": 183 }, { "clip_ratio/high_max": 0.0023009092692518607, "clip_ratio/high_mean": 0.0010173881219088798, "clip_ratio/low_mean": 0.0009192527759296354, "clip_ratio/low_min": 2.4488183044013567e-05, "clip_ratio/region_mean": 0.0019366408450878225, "epoch": 0.42927967337416156, "grad_norm": 0.12531021237373352, "learning_rate": 1e-06, "loss": -0.0084, "step": 184 }, { "clip_ratio/high_max": 0.001886182013549842, "clip_ratio/high_mean": 0.0008456229570583673, "clip_ratio/low_mean": 0.0006428303222492104, "clip_ratio/low_min": 4.2322684748796746e-05, "clip_ratio/region_mean": 0.0014884532793075778, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 827.0301513671875, "completions/mean_terminated_length": 662.2402954101562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.431612715077282, "grad_norm": 0.15010322630405426, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 28651591.0, "reward": 0.5803571939468384, "reward_std": 0.21241775155067444, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 185 }, { "clip_ratio/high_max": 0.0018301246491319034, "clip_ratio/high_mean": 0.0008700616945134243, "clip_ratio/low_mean": 0.0007397959761874517, "clip_ratio/low_min": 4.726601855509216e-05, "clip_ratio/region_mean": 0.001609857652510982, "epoch": 0.4339457567804024, "grad_norm": 0.14428773522377014, "learning_rate": 1e-06, "loss": -0.0114, "step": 186 }, { "clip_ratio/high_max": 0.0021695718314731494, "clip_ratio/high_mean": 0.0010226818194496445, "clip_ratio/low_mean": 0.0008679816110088723, "clip_ratio/low_min": 5.975446765660308e-05, "clip_ratio/region_mean": 0.0018906634286395274, "epoch": 0.4362787984835229, "grad_norm": 0.14368951320648193, "learning_rate": 1e-06, "loss": -0.0115, "step": 187 }, { "clip_ratio/high_max": 0.0022048446626286022, "clip_ratio/high_mean": 0.0010107983544003218, "clip_ratio/low_mean": 0.0009332002828159602, "clip_ratio/low_min": 0.00012687695470958715, "clip_ratio/region_mean": 0.001943998628121335, "epoch": 0.43861184018664334, "grad_norm": 0.13704383373260498, "learning_rate": 1e-06, "loss": -0.0116, "step": 188 }, { "clip_ratio/high_max": 0.0022271957059274428, "clip_ratio/high_mean": 0.0008861481364874635, "clip_ratio/low_mean": 0.0005503760521605727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014365241913765203, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2837.0, "completions/mean_length": 823.7098388671875, "completions/mean_terminated_length": 613.8480224609375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 0.4409448818897638, "grad_norm": 0.1536131203174591, "learning_rate": 1e-06, "loss": -0.0198, "num_tokens": 29263099.0, "reward": 0.5479910969734192, "reward_std": 0.1965574473142624, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 189 }, { "clip_ratio/high_max": 0.0025035149592440575, "clip_ratio/high_mean": 0.0010717627228586935, "clip_ratio/low_mean": 0.0006566193624166772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017283820998272859, "epoch": 0.4432779235928842, "grad_norm": 0.1450500190258026, "learning_rate": 1e-06, "loss": -0.0199, "step": 190 }, { "clip_ratio/high_max": 0.002502236246073153, "clip_ratio/high_mean": 0.0010475442395545542, "clip_ratio/low_mean": 0.0008652243286633166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019127685518469661, "epoch": 0.4456109652960047, "grad_norm": 0.14346785843372345, "learning_rate": 1e-06, "loss": -0.0201, "step": 191 }, { "clip_ratio/high_max": 0.0028825998524553142, "clip_ratio/high_mean": 0.0011423403193475679, "clip_ratio/low_mean": 0.0009022599560921662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002044600318185985, "epoch": 0.4479440069991251, "grad_norm": 0.13926590979099274, "learning_rate": 1e-06, "loss": -0.0202, "step": 192 }, { "clip_ratio/high_max": 0.0017455844244977925, "clip_ratio/high_mean": 0.0007080485738697462, "clip_ratio/low_mean": 0.0004908380342385499, "clip_ratio/low_min": 1.2672343473241199e-05, "clip_ratio/region_mean": 0.0011988865880994126, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3357.0, "completions/mean_length": 821.0848388671875, "completions/mean_terminated_length": 631.6268920898438, "completions/min_length": 209.0, "completions/min_terminated_length": 209.0, "epoch": 0.45027704870224555, "grad_norm": 0.15021604299545288, "learning_rate": 1e-06, "loss": -0.0227, "num_tokens": 29881103.0, "reward": 0.527901828289032, "reward_std": 0.17585225403308868, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 193 }, { "clip_ratio/high_max": 0.002242021197162103, "clip_ratio/high_mean": 0.000786731641710503, "clip_ratio/low_mean": 0.0006091638706493541, "clip_ratio/low_min": 5.467925439006649e-05, "clip_ratio/region_mean": 0.0013958955023554154, "epoch": 0.452610090405366, "grad_norm": 0.13321596384048462, "learning_rate": 1e-06, "loss": -0.0229, "step": 194 }, { "clip_ratio/high_max": 0.002216878390754573, "clip_ratio/high_mean": 0.0008677324040036183, "clip_ratio/low_mean": 0.0007575850731882383, "clip_ratio/low_min": 2.8709254365821835e-05, "clip_ratio/region_mean": 0.001625317454454489, "epoch": 0.4549431321084864, "grad_norm": 0.12558101117610931, "learning_rate": 1e-06, "loss": -0.0231, "step": 195 }, { "clip_ratio/high_max": 0.0021615528021357022, "clip_ratio/high_mean": 0.0008111449260468362, "clip_ratio/low_mean": 0.0008866158423188608, "clip_ratio/low_min": 6.265901265578577e-05, "clip_ratio/region_mean": 0.0016977607738226652, "epoch": 0.4572761738116069, "grad_norm": 0.12740087509155273, "learning_rate": 1e-06, "loss": -0.0231, "step": 196 }, { "clip_ratio/high_max": 0.002232287883089157, "clip_ratio/high_mean": 0.0009673337535787141, "clip_ratio/low_mean": 0.0005202810407354264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001487614783400204, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 888.4888916015625, "completions/mean_terminated_length": 616.6658935546875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.45960921551472733, "grad_norm": 0.15869812667369843, "learning_rate": 1e-06, "loss": -0.0224, "num_tokens": 30493005.0, "reward": 0.5814732313156128, "reward_std": 0.191481813788414, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 197 }, { "clip_ratio/high_max": 0.0022578981042897794, "clip_ratio/high_mean": 0.001041828054439975, "clip_ratio/low_mean": 0.0005890234124308336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016308514786942396, "epoch": 0.46194225721784776, "grad_norm": 0.140419140458107, "learning_rate": 1e-06, "loss": -0.0225, "step": 198 }, { "clip_ratio/high_max": 0.0025456565563217737, "clip_ratio/high_mean": 0.0011061316454288317, "clip_ratio/low_mean": 0.0007613937241330859, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001867525355919497, "epoch": 0.4642752989209682, "grad_norm": 0.1353747397661209, "learning_rate": 1e-06, "loss": -0.0226, "step": 199 }, { "clip_ratio/high_max": 0.0024561806458223145, "clip_ratio/high_mean": 0.0011150241025461582, "clip_ratio/low_mean": 0.0008858814271661686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020009055187983904, "epoch": 0.4666083406240887, "grad_norm": 0.13788971304893494, "learning_rate": 1e-06, "loss": -0.0228, "step": 200 }, { "clip_ratio/high_max": 0.0020815222633245867, "clip_ratio/high_mean": 0.0008058053899731021, "clip_ratio/low_mean": 0.0007494486671930645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015552540426142514, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3908.0, "completions/mean_length": 894.0234985351562, "completions/mean_terminated_length": 647.7175903320312, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.4689413823272091, "grad_norm": 0.16608601808547974, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 31113114.0, "reward": 0.606026828289032, "reward_std": 0.19621339440345764, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 201 }, { "clip_ratio/high_max": 0.0024526705165044405, "clip_ratio/high_mean": 0.0009260617298423313, "clip_ratio/low_mean": 0.0008891882062016521, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001815249961509835, "epoch": 0.47127442403032954, "grad_norm": 0.15323638916015625, "learning_rate": 1e-06, "loss": -0.0031, "step": 202 }, { "clip_ratio/high_max": 0.0025142339000012726, "clip_ratio/high_mean": 0.0010169709958063322, "clip_ratio/low_mean": 0.0009712312239571474, "clip_ratio/low_min": 1.633986903470941e-05, "clip_ratio/region_mean": 0.0019882022024830803, "epoch": 0.47360746573345, "grad_norm": 0.14349707961082458, "learning_rate": 1e-06, "loss": -0.0033, "step": 203 }, { "clip_ratio/high_max": 0.002481605850334745, "clip_ratio/high_mean": 0.0010075547861561063, "clip_ratio/low_mean": 0.001105919905967312, "clip_ratio/low_min": 3.267973806941882e-05, "clip_ratio/region_mean": 0.0021134746784809977, "epoch": 0.4759405074365704, "grad_norm": 0.14359316229820251, "learning_rate": 1e-06, "loss": -0.0033, "step": 204 }, { "clip_ratio/high_max": 0.0021208442267379723, "clip_ratio/high_mean": 0.0008455073002551217, "clip_ratio/low_mean": 0.000589847798437404, "clip_ratio/low_min": 1.098997745430097e-05, "clip_ratio/region_mean": 0.0014353550868690945, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 836.6183471679688, "completions/mean_terminated_length": 611.0286865234375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.4782735491396909, "grad_norm": 0.1829909235239029, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 31704844.0, "reward": 0.6350446939468384, "reward_std": 0.17585085332393646, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 205 }, { "clip_ratio/high_max": 0.00222759220196167, "clip_ratio/high_mean": 0.0009623481091693975, "clip_ratio/low_mean": 0.0005849398330610711, "clip_ratio/low_min": 2.197995490860194e-05, "clip_ratio/region_mean": 0.0015472879240405746, "epoch": 0.4806065908428113, "grad_norm": 0.1492559164762497, "learning_rate": 1e-06, "loss": -0.0119, "step": 206 }, { "clip_ratio/high_max": 0.0026327979649067856, "clip_ratio/high_mean": 0.0010532176293054363, "clip_ratio/low_mean": 0.0006820712005719543, "clip_ratio/low_min": 2.197995490860194e-05, "clip_ratio/region_mean": 0.00173528883169638, "epoch": 0.48293963254593175, "grad_norm": 0.14454133808612823, "learning_rate": 1e-06, "loss": -0.0121, "step": 207 }, { "clip_ratio/high_max": 0.0027388943490223028, "clip_ratio/high_mean": 0.001097951564588584, "clip_ratio/low_mean": 0.0009181690720652114, "clip_ratio/low_min": 1.605858233233448e-05, "clip_ratio/region_mean": 0.002016120655753184, "epoch": 0.4852726742490522, "grad_norm": 0.13695617020130157, "learning_rate": 1e-06, "loss": -0.0122, "step": 208 }, { "clip_ratio/high_max": 0.0018147952578146942, "clip_ratio/high_mean": 0.0006773728041480354, "clip_ratio/low_mean": 0.0006203562052178313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012977289879927412, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3284.0, "completions/mean_length": 784.1239013671875, "completions/mean_terminated_length": 592.5277099609375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.48760571595217267, "grad_norm": 0.15552891790866852, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 32292467.0, "reward": 0.5848214626312256, "reward_std": 0.17130544781684875, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 209 }, { "clip_ratio/high_max": 0.0021525007468881086, "clip_ratio/high_mean": 0.0008598787881055614, "clip_ratio/low_mean": 0.0007220445304483292, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015819232976355124, "epoch": 0.4899387576552931, "grad_norm": 0.15443633496761322, "learning_rate": 1e-06, "loss": 0.0022, "step": 210 }, { "clip_ratio/high_max": 0.0023462451645173132, "clip_ratio/high_mean": 0.0009156435035038157, "clip_ratio/low_mean": 0.000828188593914092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017438321083318442, "epoch": 0.49227179935841353, "grad_norm": 0.1460455358028412, "learning_rate": 1e-06, "loss": 0.002, "step": 211 }, { "clip_ratio/high_max": 0.002625642860948574, "clip_ratio/high_mean": 0.0009293550920119742, "clip_ratio/low_mean": 0.0009462321759201586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018755872806650586, "epoch": 0.49460484106153396, "grad_norm": 0.1429179459810257, "learning_rate": 1e-06, "loss": 0.0019, "step": 212 }, { "clip_ratio/high_max": 0.0020073093110113405, "clip_ratio/high_mean": 0.000759914846639731, "clip_ratio/low_mean": 0.000462331128801452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012222459736221936, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3670.0, "completions/mean_length": 747.1752319335938, "completions/mean_terminated_length": 565.9447021484375, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.4969378827646544, "grad_norm": 0.15830470621585846, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 32863360.0, "reward": 0.621651828289032, "reward_std": 0.1690923273563385, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 213 }, { "clip_ratio/high_max": 0.0022311388893285766, "clip_ratio/high_mean": 0.000997254599496955, "clip_ratio/low_mean": 0.0006108485558797838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016081031499197707, "epoch": 0.4992709244677749, "grad_norm": 0.1517784148454666, "learning_rate": 1e-06, "loss": -0.0272, "step": 214 }, { "clip_ratio/high_max": 0.0021470280553330667, "clip_ratio/high_mean": 0.0008577115968364524, "clip_ratio/low_mean": 0.0007024550213827752, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001560166598210344, "epoch": 0.5016039661708953, "grad_norm": 0.13804104924201965, "learning_rate": 1e-06, "loss": -0.0273, "step": 215 }, { "clip_ratio/high_max": 0.0028051892440998927, "clip_ratio/high_mean": 0.0010142452265426982, "clip_ratio/low_mean": 0.0007994899351615459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018137351871700957, "epoch": 0.5039370078740157, "grad_norm": 0.13298538327217102, "learning_rate": 1e-06, "loss": -0.0274, "step": 216 }, { "clip_ratio/high_max": 0.001884418226836715, "clip_ratio/high_mean": 0.0008349796298716683, "clip_ratio/low_mean": 0.0004762308617500821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013112104643369094, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 732.818115234375, "completions/mean_terminated_length": 592.03369140625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.5062700495771362, "grad_norm": 0.15719586610794067, "learning_rate": 1e-06, "loss": -0.0224, "num_tokens": 33466901.0, "reward": 0.609375, "reward_std": 0.18490804731845856, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 217 }, { "clip_ratio/high_max": 0.0024248765403172, "clip_ratio/high_mean": 0.0009681946321506985, "clip_ratio/low_mean": 0.0005426817997431499, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001510876463726163, "epoch": 0.5086030912802566, "grad_norm": 0.17562437057495117, "learning_rate": 1e-06, "loss": -0.0226, "step": 218 }, { "clip_ratio/high_max": 0.0024607518353150226, "clip_ratio/high_mean": 0.0010107837515533902, "clip_ratio/low_mean": 0.000817063302747556, "clip_ratio/low_min": 1.133478417614242e-05, "clip_ratio/region_mean": 0.0018278470524819568, "epoch": 0.510936132983377, "grad_norm": 0.13347984850406647, "learning_rate": 1e-06, "loss": -0.0227, "step": 219 }, { "clip_ratio/high_max": 0.0026186130417045206, "clip_ratio/high_mean": 0.0010899415683525149, "clip_ratio/low_mean": 0.0009008191700559109, "clip_ratio/low_min": 2.7310465156915598e-05, "clip_ratio/region_mean": 0.0019907606911147013, "epoch": 0.5132691746864976, "grad_norm": 0.13431869447231293, "learning_rate": 1e-06, "loss": -0.0228, "step": 220 }, { "clip_ratio/high_max": 0.001676053161645541, "clip_ratio/high_mean": 0.0005422348167485325, "clip_ratio/low_mean": 0.0005290484946272045, "clip_ratio/low_min": 2.7246001081948634e-05, "clip_ratio/region_mean": 0.0010712833209254313, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 773.9029541015625, "completions/mean_terminated_length": 606.4349365234375, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.515602216389618, "grad_norm": 0.1542210876941681, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 34075598.0, "reward": 0.515625, "reward_std": 0.16360442340373993, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 221 }, { "clip_ratio/high_max": 0.0019124368445773143, "clip_ratio/high_mean": 0.0006477261977124726, "clip_ratio/low_mean": 0.000581258345846436, "clip_ratio/low_min": 3.816493881458882e-05, "clip_ratio/region_mean": 0.0012289845508348662, "epoch": 0.5179352580927384, "grad_norm": 0.14712415635585785, "learning_rate": 1e-06, "loss": -0.0054, "step": 222 }, { "clip_ratio/high_max": 0.002208602600148879, "clip_ratio/high_mean": 0.000756114433897892, "clip_ratio/low_mean": 0.0008703372177478741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016264516452793032, "epoch": 0.5202682997958589, "grad_norm": 0.13447299599647522, "learning_rate": 1e-06, "loss": -0.0056, "step": 223 }, { "clip_ratio/high_max": 0.002081741469737608, "clip_ratio/high_mean": 0.0007299962762772338, "clip_ratio/low_mean": 0.0009493385659880005, "clip_ratio/low_min": 4.9603173465584405e-05, "clip_ratio/region_mean": 0.0016793348258943297, "epoch": 0.5226013414989793, "grad_norm": 0.13474909961223602, "learning_rate": 1e-06, "loss": -0.0056, "step": 224 }, { "clip_ratio/high_max": 0.0017302923006354831, "clip_ratio/high_mean": 0.0007095045375535847, "clip_ratio/low_mean": 0.0005966503613308305, "clip_ratio/low_min": 3.647505218395963e-05, "clip_ratio/region_mean": 0.0013061549034318887, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2674.0, "completions/mean_length": 749.3158569335938, "completions/mean_terminated_length": 576.482421875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.5249343832020997, "grad_norm": 0.16695386171340942, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 34653513.0, "reward": 0.5892857313156128, "reward_std": 0.17799869179725647, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 225 }, { "clip_ratio/high_max": 0.002407783606031444, "clip_ratio/high_mean": 0.0008509271483490011, "clip_ratio/low_mean": 0.0007258261230163043, "clip_ratio/low_min": 1.885938399937004e-05, "clip_ratio/region_mean": 0.0015767532750032842, "epoch": 0.5272674249052202, "grad_norm": 0.1526087522506714, "learning_rate": 1e-06, "loss": -0.0158, "step": 226 }, { "clip_ratio/high_max": 0.0023025913833407685, "clip_ratio/high_mean": 0.00088137284910772, "clip_ratio/low_mean": 0.000892662950718659, "clip_ratio/low_min": 4.7148459998425096e-05, "clip_ratio/region_mean": 0.0017740357870934531, "epoch": 0.5296004666083406, "grad_norm": 0.14756852388381958, "learning_rate": 1e-06, "loss": -0.016, "step": 227 }, { "clip_ratio/high_max": 0.002523296330764424, "clip_ratio/high_mean": 0.000958762337177177, "clip_ratio/low_mean": 0.000998503339360468, "clip_ratio/low_min": 9.42969199968502e-06, "clip_ratio/region_mean": 0.001957265689270571, "epoch": 0.531933508311461, "grad_norm": 0.14524367451667786, "learning_rate": 1e-06, "loss": -0.0161, "step": 228 }, { "clip_ratio/high_max": 0.0017749884609656874, "clip_ratio/high_mean": 0.0006844742229077383, "clip_ratio/low_mean": 0.0005758627803515992, "clip_ratio/low_min": 2.6606637220538687e-05, "clip_ratio/region_mean": 0.0012603369905264117, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3527.0, "completions/mean_length": 783.2924194335938, "completions/mean_terminated_length": 628.4929809570312, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 0.5342665500145816, "grad_norm": 0.15385381877422333, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 35281367.0, "reward": 0.5524553656578064, "reward_std": 0.1795709878206253, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 229 }, { "clip_ratio/high_max": 0.002248025863082148, "clip_ratio/high_mean": 0.0007510431696573505, "clip_ratio/low_mean": 0.0006982658669585362, "clip_ratio/low_min": 4.0437347706756555e-05, "clip_ratio/region_mean": 0.0014493090311589185, "epoch": 0.536599591717702, "grad_norm": 0.14346085488796234, "learning_rate": 1e-06, "loss": -0.0203, "step": 230 }, { "clip_ratio/high_max": 0.002111926947691245, "clip_ratio/high_mean": 0.0009057822080649203, "clip_ratio/low_mean": 0.0007865216102800332, "clip_ratio/low_min": 3.9903771721583325e-05, "clip_ratio/region_mean": 0.001692303834715858, "epoch": 0.5389326334208224, "grad_norm": 0.13601410388946533, "learning_rate": 1e-06, "loss": -0.0203, "step": 231 }, { "clip_ratio/high_max": 0.0023360310660791583, "clip_ratio/high_mean": 0.0008537830599379959, "clip_ratio/low_mean": 0.0009508076145721134, "clip_ratio/low_min": 5.0289281716686673e-05, "clip_ratio/region_mean": 0.0018045906108454801, "epoch": 0.5412656751239429, "grad_norm": 0.13489511609077454, "learning_rate": 1e-06, "loss": -0.0204, "step": 232 }, { "clip_ratio/high_max": 0.0014880389171594288, "clip_ratio/high_mean": 0.00048186242929659784, "clip_ratio/low_mean": 0.00047997832007240504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009618407475500135, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3521.0, "completions/mean_length": 743.7467041015625, "completions/mean_terminated_length": 582.9953002929688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.5435987168270633, "grad_norm": 0.13865098357200623, "learning_rate": 1e-06, "loss": -0.0151, "num_tokens": 35873972.0, "reward": 0.6071428656578064, "reward_std": 0.11953012645244598, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 233 }, { "clip_ratio/high_max": 0.0019240232759329956, "clip_ratio/high_mean": 0.0006175566004458233, "clip_ratio/low_mean": 0.0005685450960299931, "clip_ratio/low_min": 4.293565507396124e-05, "clip_ratio/region_mean": 0.0011861016937473323, "epoch": 0.5459317585301837, "grad_norm": 0.13254310190677643, "learning_rate": 1e-06, "loss": -0.0152, "step": 234 }, { "clip_ratio/high_max": 0.0022041650518076494, "clip_ratio/high_mean": 0.0006964148251427105, "clip_ratio/low_mean": 0.0007112952880561352, "clip_ratio/low_min": 5.803156818728894e-05, "clip_ratio/region_mean": 0.0014077101077418774, "epoch": 0.5482648002333042, "grad_norm": 0.12212701886892319, "learning_rate": 1e-06, "loss": -0.0154, "step": 235 }, { "clip_ratio/high_max": 0.0022669806421617977, "clip_ratio/high_mean": 0.0007154976665333379, "clip_ratio/low_mean": 0.0007446610470651649, "clip_ratio/low_min": 4.724111931864172e-05, "clip_ratio/region_mean": 0.0014601587245124392, "epoch": 0.5505978419364246, "grad_norm": 0.1242891252040863, "learning_rate": 1e-06, "loss": -0.0154, "step": 236 }, { "clip_ratio/high_max": 0.001739940886182012, "clip_ratio/high_mean": 0.0006690709469694411, "clip_ratio/low_mean": 0.0006471121196227614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013161830611352343, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3510.0, "completions/mean_length": 718.7522583007812, "completions/mean_terminated_length": 585.5429077148438, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 0.552930883639545, "grad_norm": 0.19878889620304108, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 36463718.0, "reward": 0.6283482313156128, "reward_std": 0.16938656568527222, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 237 }, { "clip_ratio/high_max": 0.002143624980817549, "clip_ratio/high_mean": 0.0008554658179491526, "clip_ratio/low_mean": 0.0008386970785068115, "clip_ratio/low_min": 5.576920466410229e-05, "clip_ratio/region_mean": 0.00169416287826607, "epoch": 0.5552639253426656, "grad_norm": 0.15399928390979767, "learning_rate": 1e-06, "loss": -0.0, "step": 238 }, { "clip_ratio/high_max": 0.0023502816547988914, "clip_ratio/high_mean": 0.0009067850733117666, "clip_ratio/low_mean": 0.0009722007471282268, "clip_ratio/low_min": 5.104478077555541e-05, "clip_ratio/region_mean": 0.0018789857713272795, "epoch": 0.557596967045786, "grad_norm": 0.15232457220554352, "learning_rate": 1e-06, "loss": -0.0001, "step": 239 }, { "clip_ratio/high_max": 0.0023724731108814012, "clip_ratio/high_mean": 0.0009244451975973789, "clip_ratio/low_mean": 0.0010867376477108337, "clip_ratio/low_min": 4.5410639359033667e-05, "clip_ratio/region_mean": 0.0020111828271183185, "epoch": 0.5599300087489064, "grad_norm": 0.14790889620780945, "learning_rate": 1e-06, "loss": -0.0002, "step": 240 }, { "clip_ratio/high_max": 0.0016735052049625665, "clip_ratio/high_mean": 0.0006779307495889952, "clip_ratio/low_mean": 0.0006132294238341274, "clip_ratio/low_min": 2.033725832006894e-05, "clip_ratio/region_mean": 0.0012911601916130167, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2889.0, "completions/mean_length": 889.2645263671875, "completions/mean_terminated_length": 650.8741455078125, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 0.5622630504520268, "grad_norm": 0.16441886126995087, "learning_rate": 1e-06, "loss": -0.0141, "num_tokens": 37090571.0, "reward": 0.5691964626312256, "reward_std": 0.18403972685337067, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 241 }, { "clip_ratio/high_max": 0.0018577990049379878, "clip_ratio/high_mean": 0.0007666599976801081, "clip_ratio/low_mean": 0.0006816638942837017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001448323866497958, "epoch": 0.5645960921551473, "grad_norm": 0.14850129187107086, "learning_rate": 1e-06, "loss": -0.0142, "step": 242 }, { "clip_ratio/high_max": 0.002126586354279425, "clip_ratio/high_mean": 0.0009070793985301862, "clip_ratio/low_mean": 0.0008444239938398823, "clip_ratio/low_min": 2.9691153940802906e-05, "clip_ratio/region_mean": 0.0017515034051029943, "epoch": 0.5669291338582677, "grad_norm": 0.13421630859375, "learning_rate": 1e-06, "loss": -0.0144, "step": 243 }, { "clip_ratio/high_max": 0.002174347777327057, "clip_ratio/high_mean": 0.0008673456795804668, "clip_ratio/low_mean": 0.0009673706917965319, "clip_ratio/low_min": 7.436059331666911e-05, "clip_ratio/region_mean": 0.001834716422308702, "epoch": 0.5692621755613881, "grad_norm": 0.132176011800766, "learning_rate": 1e-06, "loss": -0.0145, "step": 244 }, { "clip_ratio/high_max": 0.0028683393175015226, "clip_ratio/high_mean": 0.0010127493369509466, "clip_ratio/low_mean": 0.00075934248707199, "clip_ratio/low_min": 1.3736264008912258e-05, "clip_ratio/region_mean": 0.0017720918040140532, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 881.755615234375, "completions/mean_terminated_length": 667.47265625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.5715952172645086, "grad_norm": 0.18064840137958527, "learning_rate": 1e-06, "loss": -0.0202, "num_tokens": 37735216.0, "reward": 0.5412946939468384, "reward_std": 0.22469766438007355, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 245 }, { "clip_ratio/high_max": 0.002810091638821177, "clip_ratio/high_mean": 0.0011130381317343563, "clip_ratio/low_mean": 0.0009102133517444599, "clip_ratio/low_min": 4.440503107616678e-05, "clip_ratio/region_mean": 0.0020232514580129646, "epoch": 0.573928258967629, "grad_norm": 0.1686396300792694, "learning_rate": 1e-06, "loss": -0.0204, "step": 246 }, { "clip_ratio/high_max": 0.003302433338831179, "clip_ratio/high_mean": 0.0012588584941113368, "clip_ratio/low_mean": 0.0010801408207044005, "clip_ratio/low_min": 5.4901300245546736e-05, "clip_ratio/region_mean": 0.0023389992784359492, "epoch": 0.5762613006707495, "grad_norm": 0.16458149254322052, "learning_rate": 1e-06, "loss": -0.0206, "step": 247 }, { "clip_ratio/high_max": 0.0036235110528650694, "clip_ratio/high_mean": 0.0013374627342273016, "clip_ratio/low_mean": 0.0011865439810208045, "clip_ratio/low_min": 0.00012475728908611927, "clip_ratio/region_mean": 0.002524006675230339, "epoch": 0.57859434237387, "grad_norm": 0.15335099399089813, "learning_rate": 1e-06, "loss": -0.0207, "step": 248 }, { "clip_ratio/high_max": 0.002065000957372831, "clip_ratio/high_mean": 0.0008984474952740129, "clip_ratio/low_mean": 0.00064315901727241, "clip_ratio/low_min": 1.1287700544926338e-05, "clip_ratio/region_mean": 0.0015416065143654123, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 811.1272583007812, "completions/mean_terminated_length": 608.74169921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.5809273840769904, "grad_norm": 0.1811312586069107, "learning_rate": 1e-06, "loss": -0.0335, "num_tokens": 38327650.0, "reward": 0.6328125, "reward_std": 0.19808951020240784, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 249 }, { "clip_ratio/high_max": 0.002202488452894613, "clip_ratio/high_mean": 0.0009702885690785479, "clip_ratio/low_mean": 0.0008369570678041782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018072456441586837, "epoch": 0.5832604257801108, "grad_norm": 0.17485851049423218, "learning_rate": 1e-06, "loss": -0.0337, "step": 250 }, { "clip_ratio/high_max": 0.0023402113292831928, "clip_ratio/high_mean": 0.0010030769644799875, "clip_ratio/low_mean": 0.0009955549285223242, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019986318657174706, "epoch": 0.5855934674832313, "grad_norm": 0.15953102707862854, "learning_rate": 1e-06, "loss": -0.0338, "step": 251 }, { "clip_ratio/high_max": 0.002859046173398383, "clip_ratio/high_mean": 0.0012013425111945253, "clip_ratio/low_mean": 0.001164675115433056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023660176739213057, "epoch": 0.5879265091863517, "grad_norm": 0.15245676040649414, "learning_rate": 1e-06, "loss": -0.034, "step": 252 }, { "clip_ratio/high_max": 0.002454886576742865, "clip_ratio/high_mean": 0.0011808079943875782, "clip_ratio/low_mean": 0.0007260778120325995, "clip_ratio/low_min": 3.780636416195193e-05, "clip_ratio/region_mean": 0.001906885787320789, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 845.630615234375, "completions/mean_terminated_length": 633.0618286132812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.5902595508894721, "grad_norm": 0.19520410895347595, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 38949823.0, "reward": 0.6316964626312256, "reward_std": 0.23886480927467346, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 253 }, { "clip_ratio/high_max": 0.0029217097981018014, "clip_ratio/high_mean": 0.0012666904185607564, "clip_ratio/low_mean": 0.0009204373054672033, "clip_ratio/low_min": 7.578930308227427e-05, "clip_ratio/region_mean": 0.002187127734941896, "epoch": 0.5925925925925926, "grad_norm": 0.19551533460617065, "learning_rate": 1e-06, "loss": -0.0344, "step": 254 }, { "clip_ratio/high_max": 0.003028957042261027, "clip_ratio/high_mean": 0.0013827830480295233, "clip_ratio/low_mean": 0.0011081993379775668, "clip_ratio/low_min": 5.027349016017979e-05, "clip_ratio/region_mean": 0.0024909823696361855, "epoch": 0.594925634295713, "grad_norm": 0.1766534298658371, "learning_rate": 1e-06, "loss": -0.0346, "step": 255 }, { "clip_ratio/high_max": 0.0029570692277047783, "clip_ratio/high_mean": 0.001343818676105002, "clip_ratio/low_mean": 0.0012197565392852994, "clip_ratio/low_min": 5.0371709221508354e-05, "clip_ratio/region_mean": 0.002563575231761206, "epoch": 0.5972586759988335, "grad_norm": 0.16498038172721863, "learning_rate": 1e-06, "loss": -0.0347, "step": 256 }, { "clip_ratio/high_max": 0.0020004543803224806, "clip_ratio/high_mean": 0.0007045400034257909, "clip_ratio/low_mean": 0.0006450886503444053, "clip_ratio/low_min": 3.832004949799739e-05, "clip_ratio/region_mean": 0.0013496286410372704, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 819.6406860351562, "completions/mean_terminated_length": 638.2638549804688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.599591717701954, "grad_norm": 0.174686998128891, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 39589389.0, "reward": 0.5703125, "reward_std": 0.18776056170463562, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 257 }, { "clip_ratio/high_max": 0.0021911398507654667, "clip_ratio/high_mean": 0.0008066736936598318, "clip_ratio/low_mean": 0.000846038323288667, "clip_ratio/low_min": 2.730450069066137e-05, "clip_ratio/region_mean": 0.0016527120096725412, "epoch": 0.6019247594050744, "grad_norm": 0.15377922356128693, "learning_rate": 1e-06, "loss": -0.0274, "step": 258 }, { "clip_ratio/high_max": 0.0026595802191877738, "clip_ratio/high_mean": 0.0009453495586058125, "clip_ratio/low_mean": 0.0009764476362761343, "clip_ratio/low_min": 1.9160024748998694e-05, "clip_ratio/region_mean": 0.0019217972112528514, "epoch": 0.6042578011081948, "grad_norm": 0.14722417294979095, "learning_rate": 1e-06, "loss": -0.0276, "step": 259 }, { "clip_ratio/high_max": 0.002826516800269019, "clip_ratio/high_mean": 0.0009853889441728825, "clip_ratio/low_mean": 0.0011208343730686465, "clip_ratio/low_min": 2.603443499538116e-05, "clip_ratio/region_mean": 0.002106223335431423, "epoch": 0.6065908428113153, "grad_norm": 0.14746461808681488, "learning_rate": 1e-06, "loss": -0.0277, "step": 260 }, { "clip_ratio/high_max": 0.0017576406498847064, "clip_ratio/high_mean": 0.0005976478887532721, "clip_ratio/low_mean": 0.0004928772068524268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010905250819632784, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 769.7455444335938, "completions/mean_terminated_length": 573.1583862304688, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 0.6089238845144357, "grad_norm": 0.16936379671096802, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 40172817.0, "reward": 0.598214328289032, "reward_std": 0.1515413373708725, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053290486335754, "step": 261 }, { "clip_ratio/high_max": 0.002137923140253406, "clip_ratio/high_mean": 0.0007493480406992603, "clip_ratio/low_mean": 0.000567885364034737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013172333929105662, "epoch": 0.6112569262175561, "grad_norm": 0.15487205982208252, "learning_rate": 1e-06, "loss": -0.0274, "step": 262 }, { "clip_ratio/high_max": 0.002313453282113187, "clip_ratio/high_mean": 0.0008186651230062125, "clip_ratio/low_mean": 0.0007745958864688873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001593260996742174, "epoch": 0.6135899679206765, "grad_norm": 0.13710205256938934, "learning_rate": 1e-06, "loss": -0.0276, "step": 263 }, { "clip_ratio/high_max": 0.002353799540287582, "clip_ratio/high_mean": 0.0008824724536680151, "clip_ratio/low_mean": 0.0008264293965112302, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017089018401748035, "epoch": 0.615923009623797, "grad_norm": 0.13875557482242584, "learning_rate": 1e-06, "loss": -0.0276, "step": 264 }, { "clip_ratio/high_max": 0.0013810562340950128, "clip_ratio/high_mean": 0.0005493980743267457, "clip_ratio/low_mean": 0.0005788339622085914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011282320301688742, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 908.1875610351562, "completions/mean_terminated_length": 695.6666870117188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.6182560513269175, "grad_norm": 0.14903157949447632, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 40852849.0, "reward": 0.535714328289032, "reward_std": 0.17731469869613647, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 265 }, { "clip_ratio/high_max": 0.001720084110274911, "clip_ratio/high_mean": 0.0006810845243307995, "clip_ratio/low_mean": 0.000695541473760386, "clip_ratio/low_min": 6.149285763967782e-05, "clip_ratio/region_mean": 0.0013766260017291643, "epoch": 0.620589093030038, "grad_norm": 0.14406567811965942, "learning_rate": 1e-06, "loss": -0.0154, "step": 266 }, { "clip_ratio/high_max": 0.0020672293176176026, "clip_ratio/high_mean": 0.0007439389773935545, "clip_ratio/low_mean": 0.0008149237601173809, "clip_ratio/low_min": 6.404508076229831e-05, "clip_ratio/region_mean": 0.00155886275388184, "epoch": 0.6229221347331584, "grad_norm": 0.14669474959373474, "learning_rate": 1e-06, "loss": -0.0155, "step": 267 }, { "clip_ratio/high_max": 0.0019061784987570718, "clip_ratio/high_mean": 0.0007410540856653824, "clip_ratio/low_mean": 0.0009376859288749984, "clip_ratio/low_min": 6.939301329111913e-05, "clip_ratio/region_mean": 0.001678740001807455, "epoch": 0.6252551764362788, "grad_norm": 0.13039635121822357, "learning_rate": 1e-06, "loss": -0.0156, "step": 268 }, { "clip_ratio/high_max": 0.0018028199483524077, "clip_ratio/high_mean": 0.0006874569044157397, "clip_ratio/low_mean": 0.0005106005064590136, "clip_ratio/low_min": 2.9585884476546198e-05, "clip_ratio/region_mean": 0.0011980574017798062, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 888.19091796875, "completions/mean_terminated_length": 620.5502319335938, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.6275882181393992, "grad_norm": 0.15618275105953217, "learning_rate": 1e-06, "loss": -0.0256, "num_tokens": 41449668.0, "reward": 0.5658482313156128, "reward_std": 0.1510535031557083, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 269 }, { "clip_ratio/high_max": 0.0019446795340627432, "clip_ratio/high_mean": 0.0007613918933202513, "clip_ratio/low_mean": 0.0007282111346285092, "clip_ratio/low_min": 3.0350084671226796e-05, "clip_ratio/region_mean": 0.001489602989749983, "epoch": 0.6299212598425197, "grad_norm": 0.14463922381401062, "learning_rate": 1e-06, "loss": -0.0258, "step": 270 }, { "clip_ratio/high_max": 0.0021781206960440613, "clip_ratio/high_mean": 0.0008448699973087059, "clip_ratio/low_mean": 0.0008981303326436318, "clip_ratio/low_min": 8.13642363937106e-05, "clip_ratio/region_mean": 0.0017430003572371788, "epoch": 0.6322543015456401, "grad_norm": 0.1415518820285797, "learning_rate": 1e-06, "loss": -0.026, "step": 271 }, { "clip_ratio/high_max": 0.0020730302603624295, "clip_ratio/high_mean": 0.0008484179506922374, "clip_ratio/low_mean": 0.000955949011768098, "clip_ratio/low_min": 0.00013005232176510617, "clip_ratio/region_mean": 0.0018043669479084201, "epoch": 0.6345873432487605, "grad_norm": 0.13381057977676392, "learning_rate": 1e-06, "loss": -0.026, "step": 272 }, { "clip_ratio/high_max": 0.002148325598682277, "clip_ratio/high_mean": 0.0009207470575347543, "clip_ratio/low_mean": 0.0006785067789678578, "clip_ratio/low_min": 1.9409937522141263e-05, "clip_ratio/region_mean": 0.0015992538355931174, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 948.7210083007812, "completions/mean_terminated_length": 661.2107543945312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.636920384951881, "grad_norm": 0.20500075817108154, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 42082890.0, "reward": 0.5368303656578064, "reward_std": 0.2144351601600647, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 273 }, { "clip_ratio/high_max": 0.0024067259291769005, "clip_ratio/high_mean": 0.0010504097954253666, "clip_ratio/low_mean": 0.0008104904227366205, "clip_ratio/low_min": 1.625487675482873e-05, "clip_ratio/region_mean": 0.0018609002800076269, "epoch": 0.6392534266550015, "grad_norm": 0.18786416947841644, "learning_rate": 1e-06, "loss": -0.0058, "step": 274 }, { "clip_ratio/high_max": 0.0024999762536026537, "clip_ratio/high_mean": 0.0010430742440803442, "clip_ratio/low_mean": 0.0010387395932411891, "clip_ratio/low_min": 8.285888179671019e-05, "clip_ratio/region_mean": 0.0020818138364120387, "epoch": 0.6415864683581219, "grad_norm": 0.17506757378578186, "learning_rate": 1e-06, "loss": -0.006, "step": 275 }, { "clip_ratio/high_max": 0.0027001566559192725, "clip_ratio/high_mean": 0.001160879499366274, "clip_ratio/low_mean": 0.0014031115624675294, "clip_ratio/low_min": 0.0001541474302939605, "clip_ratio/region_mean": 0.0025639910527388565, "epoch": 0.6439195100612424, "grad_norm": 0.19004221260547638, "learning_rate": 1e-06, "loss": -0.0061, "step": 276 }, { "clip_ratio/high_max": 0.001982006178877782, "clip_ratio/high_mean": 0.0008011192785488674, "clip_ratio/low_mean": 0.0008978966488939477, "clip_ratio/low_min": 0.0001257303429156309, "clip_ratio/region_mean": 0.0016990159565466456, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 922.2232666015625, "completions/mean_terminated_length": 669.8505859375, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 0.6462525517643628, "grad_norm": 0.2027897983789444, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 42728906.0, "reward": 0.5569196939468384, "reward_std": 0.22837404906749725, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 277 }, { "clip_ratio/high_max": 0.002469796600053087, "clip_ratio/high_mean": 0.0010389416602265555, "clip_ratio/low_mean": 0.001157696471636882, "clip_ratio/low_min": 0.0002515920023142826, "clip_ratio/region_mean": 0.00219663812458748, "epoch": 0.6485855934674832, "grad_norm": 0.18985936045646667, "learning_rate": 1e-06, "loss": -0.0335, "step": 278 }, { "clip_ratio/high_max": 0.002642264138557948, "clip_ratio/high_mean": 0.0011488028867461253, "clip_ratio/low_mean": 0.0012836683745263144, "clip_ratio/low_min": 0.00027828193560708314, "clip_ratio/region_mean": 0.0024324712867382914, "epoch": 0.6509186351706037, "grad_norm": 0.18310676515102386, "learning_rate": 1e-06, "loss": -0.0337, "step": 279 }, { "clip_ratio/high_max": 0.0028919992182636634, "clip_ratio/high_mean": 0.0011562096296984237, "clip_ratio/low_mean": 0.0015426580976054538, "clip_ratio/low_min": 0.0002890102396122529, "clip_ratio/region_mean": 0.002698867676372174, "epoch": 0.6532516768737241, "grad_norm": 0.16260650753974915, "learning_rate": 1e-06, "loss": -0.0338, "step": 280 }, { "clip_ratio/high_max": 0.00219465876580216, "clip_ratio/high_mean": 0.0007919146764834295, "clip_ratio/low_mean": 0.0006715007675666129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014634154649684206, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 842.8627319335938, "completions/mean_terminated_length": 605.2083740234375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.6555847185768445, "grad_norm": 0.19941651821136475, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 43333839.0, "reward": 0.5569196939468384, "reward_std": 0.17528702318668365, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 281 }, { "clip_ratio/high_max": 0.0021507229139388073, "clip_ratio/high_mean": 0.0008451492731182952, "clip_ratio/low_mean": 0.0008469266977044754, "clip_ratio/low_min": 1.4895138519932516e-05, "clip_ratio/region_mean": 0.001692075948085403, "epoch": 0.657917760279965, "grad_norm": 0.16673003137111664, "learning_rate": 1e-06, "loss": -0.0229, "step": 282 }, { "clip_ratio/high_max": 0.0024777984945103526, "clip_ratio/high_mean": 0.0009478648280492052, "clip_ratio/low_mean": 0.001053386182320537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002001250999455806, "epoch": 0.6602508019830855, "grad_norm": 0.15395739674568176, "learning_rate": 1e-06, "loss": -0.0232, "step": 283 }, { "clip_ratio/high_max": 0.00210989813786, "clip_ratio/high_mean": 0.0008852029659465188, "clip_ratio/low_mean": 0.001249679880857002, "clip_ratio/low_min": 1.4384349924512208e-05, "clip_ratio/region_mean": 0.0021348828231566586, "epoch": 0.6625838436862059, "grad_norm": 0.14514604210853577, "learning_rate": 1e-06, "loss": -0.0232, "step": 284 }, { "clip_ratio/high_max": 0.001999265434278641, "clip_ratio/high_mean": 0.000810526340501383, "clip_ratio/low_mean": 0.0009376847829116741, "clip_ratio/low_min": 6.594186834263382e-05, "clip_ratio/region_mean": 0.0017482111034041736, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2510.0, "completions/mean_length": 875.3549194335938, "completions/mean_terminated_length": 635.9304809570312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.6649168853893264, "grad_norm": 0.22797183692455292, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 43961645.0, "reward": 0.5212053656578064, "reward_std": 0.2005843222141266, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982911348342896, "step": 285 }, { "clip_ratio/high_max": 0.002601971646072343, "clip_ratio/high_mean": 0.000978758151177317, "clip_ratio/low_mean": 0.0011493046149553265, "clip_ratio/low_min": 0.00012364785106910858, "clip_ratio/region_mean": 0.002128062733390834, "epoch": 0.6672499270924468, "grad_norm": 0.19644752144813538, "learning_rate": 1e-06, "loss": -0.0149, "step": 286 }, { "clip_ratio/high_max": 0.0029370293123065494, "clip_ratio/high_mean": 0.001147280399891315, "clip_ratio/low_mean": 0.0013407869519141968, "clip_ratio/low_min": 0.00013045562263869215, "clip_ratio/region_mean": 0.0024880673008738086, "epoch": 0.6695829687955672, "grad_norm": 0.17700296640396118, "learning_rate": 1e-06, "loss": -0.0151, "step": 287 }, { "clip_ratio/high_max": 0.0031012436520541087, "clip_ratio/high_mean": 0.0011367060978955124, "clip_ratio/low_mean": 0.0016410894750151783, "clip_ratio/low_min": 0.00017486737488070503, "clip_ratio/region_mean": 0.002777795525616966, "epoch": 0.6719160104986877, "grad_norm": 0.17286638915538788, "learning_rate": 1e-06, "loss": -0.0152, "step": 288 }, { "clip_ratio/high_max": 0.001724022728012642, "clip_ratio/high_mean": 0.0006826344042565324, "clip_ratio/low_mean": 0.0005667309815180488, "clip_ratio/low_min": 2.887398932216456e-05, "clip_ratio/region_mean": 0.0012493653921410441, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3297.0, "completions/mean_length": 920.333740234375, "completions/mean_terminated_length": 655.3748779296875, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.6742490522018081, "grad_norm": 0.17062070965766907, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 44600416.0, "reward": 0.5580357313156128, "reward_std": 0.16750861704349518, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 289 }, { "clip_ratio/high_max": 0.0020607160186045803, "clip_ratio/high_mean": 0.0008350994885404361, "clip_ratio/low_mean": 0.0006770236104784999, "clip_ratio/low_min": 1.2852148756792303e-05, "clip_ratio/region_mean": 0.0015121231153898407, "epoch": 0.6765820939049285, "grad_norm": 0.154007688164711, "learning_rate": 1e-06, "loss": -0.0543, "step": 290 }, { "clip_ratio/high_max": 0.002040704872342758, "clip_ratio/high_mean": 0.0009163299873762298, "clip_ratio/low_mean": 0.0008831634622765705, "clip_ratio/low_min": 3.047500467801001e-05, "clip_ratio/region_mean": 0.0017994934823946096, "epoch": 0.678915135608049, "grad_norm": 0.1436816304922104, "learning_rate": 1e-06, "loss": -0.0544, "step": 291 }, { "clip_ratio/high_max": 0.002354840806219727, "clip_ratio/high_mean": 0.0008853112758515636, "clip_ratio/low_mean": 0.0009733925799082499, "clip_ratio/low_min": 2.5704297513584606e-05, "clip_ratio/region_mean": 0.0018587038503028452, "epoch": 0.6812481773111695, "grad_norm": 0.13852904736995697, "learning_rate": 1e-06, "loss": -0.0545, "step": 292 }, { "clip_ratio/high_max": 0.0021498534333659336, "clip_ratio/high_mean": 0.0008024014623515541, "clip_ratio/low_mean": 0.0006454346130340127, "clip_ratio/low_min": 2.5375558834639378e-05, "clip_ratio/region_mean": 0.001447836053557694, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3623.0, "completions/mean_length": 971.1328735351562, "completions/mean_terminated_length": 652.1119384765625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.6835812190142899, "grad_norm": 0.20537430047988892, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 45220287.0, "reward": 0.5691964626312256, "reward_std": 0.18303194642066956, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 293 }, { "clip_ratio/high_max": 0.0021460836433107033, "clip_ratio/high_mean": 0.0008764559806877514, "clip_ratio/low_mean": 0.0008536525929230265, "clip_ratio/low_min": 8.626158432889497e-05, "clip_ratio/region_mean": 0.0017301085536018945, "epoch": 0.6859142607174104, "grad_norm": 0.18146011233329773, "learning_rate": 1e-06, "loss": -0.0386, "step": 294 }, { "clip_ratio/high_max": 0.002821837035298813, "clip_ratio/high_mean": 0.0010902132889896166, "clip_ratio/low_mean": 0.0010684805311029777, "clip_ratio/low_min": 5.1454062486300245e-05, "clip_ratio/region_mean": 0.0021586938310065307, "epoch": 0.6882473024205308, "grad_norm": 0.17304526269435883, "learning_rate": 1e-06, "loss": -0.0388, "step": 295 }, { "clip_ratio/high_max": 0.0025890369288390502, "clip_ratio/high_mean": 0.0010107452362717595, "clip_ratio/low_mean": 0.0011576586766750552, "clip_ratio/low_min": 7.048963198030833e-05, "clip_ratio/region_mean": 0.0021684039020328782, "epoch": 0.6905803441236512, "grad_norm": 0.15993866324424744, "learning_rate": 1e-06, "loss": -0.0389, "step": 296 }, { "clip_ratio/high_max": 0.002087494191073347, "clip_ratio/high_mean": 0.0007458662621502299, "clip_ratio/low_mean": 0.0006469466452472261, "clip_ratio/low_min": 2.270076856802916e-05, "clip_ratio/region_mean": 0.0013928129119449295, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 866.2857666015625, "completions/mean_terminated_length": 646.8652954101562, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.6929133858267716, "grad_norm": 0.19377551972866058, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 45848071.0, "reward": 0.5390625, "reward_std": 0.16830208897590637, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 297 }, { "clip_ratio/high_max": 0.002341042520129122, "clip_ratio/high_mean": 0.0008734871789783938, "clip_ratio/low_mean": 0.0007677624307689257, "clip_ratio/low_min": 3.7830393011972774e-05, "clip_ratio/region_mean": 0.0016412495970143937, "epoch": 0.6952464275298921, "grad_norm": 0.18440525233745575, "learning_rate": 1e-06, "loss": -0.0056, "step": 298 }, { "clip_ratio/high_max": 0.0025020054017659277, "clip_ratio/high_mean": 0.000964397990173893, "clip_ratio/low_mean": 0.0009638205920055043, "clip_ratio/low_min": 1.9797276763711125e-05, "clip_ratio/region_mean": 0.0019282185603515245, "epoch": 0.6975794692330125, "grad_norm": 0.1549079865217209, "learning_rate": 1e-06, "loss": -0.0058, "step": 299 }, { "clip_ratio/high_max": 0.002495607521268539, "clip_ratio/high_mean": 0.000916457383937086, "clip_ratio/low_mean": 0.0011305743719276506, "clip_ratio/low_min": 3.956953059969237e-05, "clip_ratio/region_mean": 0.002047031703114044, "epoch": 0.6999125109361329, "grad_norm": 0.15340441465377808, "learning_rate": 1e-06, "loss": -0.0059, "step": 300 }, { "clip_ratio/high_max": 0.002479487993696239, "clip_ratio/high_mean": 0.0010109548456966877, "clip_ratio/low_mean": 0.0007489804047509097, "clip_ratio/low_min": 8.222711767302826e-05, "clip_ratio/region_mean": 0.0017599352431716397, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2059.0, "completions/mean_length": 1006.3047485351562, "completions/mean_terminated_length": 661.301513671875, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.7022455526392535, "grad_norm": 0.21473151445388794, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 46474320.0, "reward": 0.543526828289032, "reward_std": 0.2144775092601776, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 301 }, { "clip_ratio/high_max": 0.0028480310429586098, "clip_ratio/high_mean": 0.0011781237517425325, "clip_ratio/low_mean": 0.0008835742810333613, "clip_ratio/low_min": 9.115397187997587e-05, "clip_ratio/region_mean": 0.002061698047327809, "epoch": 0.7045785943423739, "grad_norm": 0.19146059453487396, "learning_rate": 1e-06, "loss": -0.048, "step": 302 }, { "clip_ratio/high_max": 0.0030311847804114223, "clip_ratio/high_mean": 0.0012647803814616054, "clip_ratio/low_mean": 0.0011280409526079893, "clip_ratio/low_min": 6.366150228132028e-05, "clip_ratio/region_mean": 0.0023928213558974676, "epoch": 0.7069116360454943, "grad_norm": 0.18539467453956604, "learning_rate": 1e-06, "loss": -0.0482, "step": 303 }, { "clip_ratio/high_max": 0.0030102770251687616, "clip_ratio/high_mean": 0.001217226883454714, "clip_ratio/low_mean": 0.0013476571002684068, "clip_ratio/low_min": 0.00012812625027436297, "clip_ratio/region_mean": 0.002564883994637057, "epoch": 0.7092446777486148, "grad_norm": 0.18372808396816254, "learning_rate": 1e-06, "loss": -0.0483, "step": 304 }, { "clip_ratio/high_max": 0.0018803218481480144, "clip_ratio/high_mean": 0.00078264167495945, "clip_ratio/low_mean": 0.0006800834380555898, "clip_ratio/low_min": 1.233471448358614e-05, "clip_ratio/region_mean": 0.0014627251330239233, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2283.0, "completions/mean_length": 905.3359985351562, "completions/mean_terminated_length": 609.6158447265625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.7115777194517352, "grad_norm": 0.2239924669265747, "learning_rate": 1e-06, "loss": -0.0251, "num_tokens": 47065829.0, "reward": 0.5714285969734192, "reward_std": 0.1870116889476776, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 305 }, { "clip_ratio/high_max": 0.0026517381411395036, "clip_ratio/high_mean": 0.0011003393010469154, "clip_ratio/low_mean": 0.0010205757225776324, "clip_ratio/low_min": 7.395359716610983e-05, "clip_ratio/region_mean": 0.002120915065461304, "epoch": 0.7139107611548556, "grad_norm": 0.2101132869720459, "learning_rate": 1e-06, "loss": -0.0254, "step": 306 }, { "clip_ratio/high_max": 0.0028270179318496957, "clip_ratio/high_mean": 0.0011196137056685984, "clip_ratio/low_mean": 0.0012128089510952123, "clip_ratio/low_min": 2.466942896717228e-05, "clip_ratio/region_mean": 0.0023324227513512596, "epoch": 0.7162438028579761, "grad_norm": 0.18428486585617065, "learning_rate": 1e-06, "loss": -0.0256, "step": 307 }, { "clip_ratio/high_max": 0.002363015402806923, "clip_ratio/high_mean": 0.0010658901901479112, "clip_ratio/low_mean": 0.0014643462345702574, "clip_ratio/low_min": 0.00010415140059194528, "clip_ratio/region_mean": 0.002530236371967476, "epoch": 0.7185768445610965, "grad_norm": 0.17178674042224884, "learning_rate": 1e-06, "loss": -0.0257, "step": 308 }, { "clip_ratio/high_max": 0.002385223771852907, "clip_ratio/high_mean": 0.0009904428261506837, "clip_ratio/low_mean": 0.0005621537156912382, "clip_ratio/low_min": 2.756753019639291e-05, "clip_ratio/region_mean": 0.0015525966045970563, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2489.0, "completions/mean_length": 897.09716796875, "completions/mean_terminated_length": 600.6134033203125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 0.7209098862642169, "grad_norm": 0.19375817477703094, "learning_rate": 1e-06, "loss": -0.0374, "num_tokens": 47644244.0, "reward": 0.6149553656578064, "reward_std": 0.1734083741903305, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 309 }, { "clip_ratio/high_max": 0.0027656317324726842, "clip_ratio/high_mean": 0.0011467549702501856, "clip_ratio/low_mean": 0.0006837193341198144, "clip_ratio/low_min": 1.0453252798470203e-05, "clip_ratio/region_mean": 0.0018304742843611166, "epoch": 0.7232429279673375, "grad_norm": 0.1754893809556961, "learning_rate": 1e-06, "loss": -0.0375, "step": 310 }, { "clip_ratio/high_max": 0.002889984578359872, "clip_ratio/high_mean": 0.0012613694634637795, "clip_ratio/low_mean": 0.0009629708802094683, "clip_ratio/low_min": 3.538256623869529e-05, "clip_ratio/region_mean": 0.002224340358225163, "epoch": 0.7255759696704579, "grad_norm": 0.16185636818408966, "learning_rate": 1e-06, "loss": -0.0377, "step": 311 }, { "clip_ratio/high_max": 0.002871236007194966, "clip_ratio/high_mean": 0.0012253369859536178, "clip_ratio/low_mean": 0.001014525489154039, "clip_ratio/low_min": 1.799078927433584e-05, "clip_ratio/region_mean": 0.0022398624787456356, "epoch": 0.7279090113735783, "grad_norm": 0.17588123679161072, "learning_rate": 1e-06, "loss": -0.0378, "step": 312 }, { "clip_ratio/high_max": 0.002086147724185139, "clip_ratio/high_mean": 0.0008469103850075044, "clip_ratio/low_mean": 0.0008083435477601597, "clip_ratio/low_min": 1.1007397006324027e-05, "clip_ratio/region_mean": 0.0016552538945688866, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 885.739990234375, "completions/mean_terminated_length": 613.6840209960938, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 0.7302420530766988, "grad_norm": 0.23492024838924408, "learning_rate": 1e-06, "loss": -0.0367, "num_tokens": 48240235.0, "reward": 0.5870535969734192, "reward_std": 0.20587676763534546, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 313 }, { "clip_ratio/high_max": 0.002379067002038937, "clip_ratio/high_mean": 0.0010538723690842744, "clip_ratio/low_mean": 0.0010255752076773206, "clip_ratio/low_min": 5.036196489527356e-05, "clip_ratio/region_mean": 0.0020794475931324996, "epoch": 0.7325750947798192, "grad_norm": 0.20596159994602203, "learning_rate": 1e-06, "loss": -0.0369, "step": 314 }, { "clip_ratio/high_max": 0.002566054739872925, "clip_ratio/high_mean": 0.0010753861861303449, "clip_ratio/low_mean": 0.0012584659052663483, "clip_ratio/low_min": 1.940692527568899e-05, "clip_ratio/region_mean": 0.0023338521641562693, "epoch": 0.7349081364829396, "grad_norm": 0.18503864109516144, "learning_rate": 1e-06, "loss": -0.0371, "step": 315 }, { "clip_ratio/high_max": 0.0028267628003959544, "clip_ratio/high_mean": 0.0011631867128016893, "clip_ratio/low_mean": 0.001585173016792396, "clip_ratio/low_min": 8.646383503219113e-05, "clip_ratio/region_mean": 0.0027483597295940854, "epoch": 0.73724117818606, "grad_norm": 0.17676009237766266, "learning_rate": 1e-06, "loss": -0.0373, "step": 316 }, { "clip_ratio/high_max": 0.0020308344828663394, "clip_ratio/high_mean": 0.0008841200924507575, "clip_ratio/low_mean": 0.0006222654037628672, "clip_ratio/low_min": 3.280446890130406e-05, "clip_ratio/region_mean": 0.001506385437096469, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 837.7857666015625, "completions/mean_terminated_length": 582.931396484375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.7395742198891805, "grad_norm": 0.2314973920583725, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 48814699.0, "reward": 0.613839328289032, "reward_std": 0.19084171950817108, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 317 }, { "clip_ratio/high_max": 0.0025161904195556417, "clip_ratio/high_mean": 0.0010388361515651923, "clip_ratio/low_mean": 0.0009122929877776187, "clip_ratio/low_min": 7.236033252411289e-05, "clip_ratio/region_mean": 0.0019511291611706838, "epoch": 0.7419072615923009, "grad_norm": 0.20519977807998657, "learning_rate": 1e-06, "loss": 0.0014, "step": 318 }, { "clip_ratio/high_max": 0.0024081753472273704, "clip_ratio/high_mean": 0.0009931490312737878, "clip_ratio/low_mean": 0.001269341999432072, "clip_ratio/low_min": 8.174411141226301e-05, "clip_ratio/region_mean": 0.0022624911143793724, "epoch": 0.7442403032954215, "grad_norm": 0.1907121241092682, "learning_rate": 1e-06, "loss": 0.0012, "step": 319 }, { "clip_ratio/high_max": 0.0027625623624771833, "clip_ratio/high_mean": 0.001064769570803037, "clip_ratio/low_mean": 0.0013623945651488611, "clip_ratio/low_min": 7.313470996450633e-05, "clip_ratio/region_mean": 0.002427164145046845, "epoch": 0.7465733449985419, "grad_norm": 0.18573161959648132, "learning_rate": 1e-06, "loss": 0.0012, "step": 320 }, { "clip_ratio/high_max": 0.0020261932440917008, "clip_ratio/high_mean": 0.0008740671855775872, "clip_ratio/low_mean": 0.0007327803323278204, "clip_ratio/low_min": 2.4659695554873906e-05, "clip_ratio/region_mean": 0.001606847537914291, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2926.0, "completions/mean_length": 984.7779541015625, "completions/mean_terminated_length": 654.4506225585938, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.7489063867016623, "grad_norm": 0.22532038390636444, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 49440620.0, "reward": 0.5613839626312256, "reward_std": 0.18416057527065277, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 321 }, { "clip_ratio/high_max": 0.002825237992510665, "clip_ratio/high_mean": 0.0011012507529812865, "clip_ratio/low_mean": 0.0009591566486051306, "clip_ratio/low_min": 1.2329847777436953e-05, "clip_ratio/region_mean": 0.002060407350654714, "epoch": 0.7512394284047827, "grad_norm": 0.2141539305448532, "learning_rate": 1e-06, "loss": -0.0202, "step": 322 }, { "clip_ratio/high_max": 0.0029633525336976163, "clip_ratio/high_mean": 0.0011840704191854456, "clip_ratio/low_mean": 0.001118860669521382, "clip_ratio/low_min": 4.405286381370388e-05, "clip_ratio/region_mean": 0.002302931126905605, "epoch": 0.7535724701079032, "grad_norm": 0.17943425476551056, "learning_rate": 1e-06, "loss": -0.0204, "step": 323 }, { "clip_ratio/high_max": 0.0028357562187011354, "clip_ratio/high_mean": 0.001128319876443129, "clip_ratio/low_mean": 0.001372187805827707, "clip_ratio/low_min": 7.397909212158993e-05, "clip_ratio/region_mean": 0.0025005076968227513, "epoch": 0.7559055118110236, "grad_norm": 0.16282466053962708, "learning_rate": 1e-06, "loss": -0.0205, "step": 324 }, { "clip_ratio/high_max": 0.0017659160221228376, "clip_ratio/high_mean": 0.0007654340861336095, "clip_ratio/low_mean": 0.0006649163590282114, "clip_ratio/low_min": 1.1312216884107329e-05, "clip_ratio/region_mean": 0.001430350461305352, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 940.239990234375, "completions/mean_terminated_length": 643.5446166992188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.758238553514144, "grad_norm": 0.21520856022834778, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 50061851.0, "reward": 0.5424107313156128, "reward_std": 0.18437741696834564, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 325 }, { "clip_ratio/high_max": 0.002330236129637342, "clip_ratio/high_mean": 0.0009415907279617386, "clip_ratio/low_mean": 0.000835203873066348, "clip_ratio/low_min": 3.393665247131139e-05, "clip_ratio/region_mean": 0.001776794575562235, "epoch": 0.7605715952172645, "grad_norm": 0.1842113435268402, "learning_rate": 1e-06, "loss": -0.0353, "step": 326 }, { "clip_ratio/high_max": 0.00230511437257519, "clip_ratio/high_mean": 0.001003508476060233, "clip_ratio/low_mean": 0.0010929000836767955, "clip_ratio/low_min": 1.1312216884107329e-05, "clip_ratio/region_mean": 0.0020964085560990497, "epoch": 0.7629046369203849, "grad_norm": 0.16316959261894226, "learning_rate": 1e-06, "loss": -0.0354, "step": 327 }, { "clip_ratio/high_max": 0.002567019429989159, "clip_ratio/high_mean": 0.000990610795270186, "clip_ratio/low_mean": 0.0012704540695267497, "clip_ratio/low_min": 9.153485734714195e-06, "clip_ratio/region_mean": 0.0022610649466514587, "epoch": 0.7652376786235054, "grad_norm": 0.204722598195076, "learning_rate": 1e-06, "loss": -0.0355, "step": 328 }, { "clip_ratio/high_max": 0.0017108206229750067, "clip_ratio/high_mean": 0.0006872556414236897, "clip_ratio/low_mean": 0.0005310801495852502, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012183357739559142, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2780.0, "completions/mean_length": 852.4699096679688, "completions/mean_terminated_length": 611.3441162109375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.7675707203266259, "grad_norm": 0.21647314727306366, "learning_rate": 1e-06, "loss": -0.026, "num_tokens": 50656928.0, "reward": 0.6395089626312256, "reward_std": 0.15737581253051758, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 329 }, { "clip_ratio/high_max": 0.0020555987684929278, "clip_ratio/high_mean": 0.0007949248674776754, "clip_ratio/low_mean": 0.000770878323010038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015658032170904335, "epoch": 0.7699037620297463, "grad_norm": 0.1775524616241455, "learning_rate": 1e-06, "loss": -0.0262, "step": 330 }, { "clip_ratio/high_max": 0.0025171050292556174, "clip_ratio/high_mean": 0.0009998566874855896, "clip_ratio/low_mean": 0.0008616442519269185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018615009503264446, "epoch": 0.7722368037328667, "grad_norm": 0.16343437135219574, "learning_rate": 1e-06, "loss": -0.0264, "step": 331 }, { "clip_ratio/high_max": 0.002173786866478622, "clip_ratio/high_mean": 0.0009114055246755015, "clip_ratio/low_mean": 0.0010367860127189488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019481915842334274, "epoch": 0.7745698454359872, "grad_norm": 0.15880723297595978, "learning_rate": 1e-06, "loss": -0.0264, "step": 332 }, { "clip_ratio/high_max": 0.0017265135975321755, "clip_ratio/high_mean": 0.0005917233247600961, "clip_ratio/low_mean": 0.0006106325026848936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001202355808345601, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 1066.146240234375, "completions/mean_terminated_length": 642.120849609375, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.7769028871391076, "grad_norm": 0.21014513075351715, "learning_rate": 1e-06, "loss": -0.0184, "num_tokens": 51257251.0, "reward": 0.504464328289032, "reward_std": 0.1623242348432541, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 333 }, { "clip_ratio/high_max": 0.0021170642612560187, "clip_ratio/high_mean": 0.0007432475922541926, "clip_ratio/low_mean": 0.0008848217221384402, "clip_ratio/low_min": 1.387963584420504e-05, "clip_ratio/region_mean": 0.0016280692943837494, "epoch": 0.779235928842228, "grad_norm": 0.18322604894638062, "learning_rate": 1e-06, "loss": -0.0186, "step": 334 }, { "clip_ratio/high_max": 0.0023809336016711313, "clip_ratio/high_mean": 0.0008279726753244177, "clip_ratio/low_mean": 0.0009638046303734882, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.00179177729296498, "epoch": 0.7815689705453485, "grad_norm": 0.1649850606918335, "learning_rate": 1e-06, "loss": -0.0188, "step": 335 }, { "clip_ratio/high_max": 0.0023388637600874063, "clip_ratio/high_mean": 0.0008620175885880599, "clip_ratio/low_mean": 0.0011599286262935493, "clip_ratio/low_min": 5.6761591622489505e-05, "clip_ratio/region_mean": 0.0020219462167005986, "epoch": 0.7839020122484689, "grad_norm": 0.1583496779203415, "learning_rate": 1e-06, "loss": -0.0188, "step": 336 }, { "clip_ratio/high_max": 0.0019701817764143925, "clip_ratio/high_mean": 0.0008163482880263473, "clip_ratio/low_mean": 0.0004907682782686607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013071165485598613, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 972.9855346679688, "completions/mean_terminated_length": 619.9490356445312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.7862350539515894, "grad_norm": 0.2171715795993805, "learning_rate": 1e-06, "loss": -0.0345, "num_tokens": 51846038.0, "reward": 0.5714285969734192, "reward_std": 0.15593409538269043, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 337 }, { "clip_ratio/high_max": 0.002572505996795371, "clip_ratio/high_mean": 0.0009753500671649817, "clip_ratio/low_mean": 0.000732240172510501, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017075901851058006, "epoch": 0.7885680956547099, "grad_norm": 0.20398429036140442, "learning_rate": 1e-06, "loss": -0.0347, "step": 338 }, { "clip_ratio/high_max": 0.0028881516482215375, "clip_ratio/high_mean": 0.001134720796471811, "clip_ratio/low_mean": 0.0007902334382379195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019249542820034549, "epoch": 0.7909011373578303, "grad_norm": 0.16836610436439514, "learning_rate": 1e-06, "loss": -0.0349, "step": 339 }, { "clip_ratio/high_max": 0.002433148052659817, "clip_ratio/high_mean": 0.0010081968639497063, "clip_ratio/low_mean": 0.001013724282529438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002021921107370872, "epoch": 0.7932341790609507, "grad_norm": 0.15733319520950317, "learning_rate": 1e-06, "loss": -0.035, "step": 340 }, { "clip_ratio/high_max": 0.0019665661711769644, "clip_ratio/high_mean": 0.0006874319560665754, "clip_ratio/low_mean": 0.0005987316217215266, "clip_ratio/low_min": 5.924893321207492e-05, "clip_ratio/region_mean": 0.001286163580516586, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 863.029052734375, "completions/mean_terminated_length": 618.5186157226562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.7955672207640712, "grad_norm": 0.22709819674491882, "learning_rate": 1e-06, "loss": -0.0126, "num_tokens": 52448128.0, "reward": 0.5625, "reward_std": 0.15225742757320404, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 341 }, { "clip_ratio/high_max": 0.0019139994692523032, "clip_ratio/high_mean": 0.0007924420715426095, "clip_ratio/low_mean": 0.0008447009604424238, "clip_ratio/low_min": 0.00012417925518093398, "clip_ratio/region_mean": 0.0016371430319850333, "epoch": 0.7979002624671916, "grad_norm": 0.17402707040309906, "learning_rate": 1e-06, "loss": -0.0129, "step": 342 }, { "clip_ratio/high_max": 0.00228307512588799, "clip_ratio/high_mean": 0.0007988824791027582, "clip_ratio/low_mean": 0.0009502735165369813, "clip_ratio/low_min": 0.00010351143191655865, "clip_ratio/region_mean": 0.0017491560320195276, "epoch": 0.800233304170312, "grad_norm": 0.1693955808877945, "learning_rate": 1e-06, "loss": -0.013, "step": 343 }, { "clip_ratio/high_max": 0.0020274076487112325, "clip_ratio/high_mean": 0.0008058251578404452, "clip_ratio/low_mean": 0.0012059471646352904, "clip_ratio/low_min": 0.0001847325765993446, "clip_ratio/region_mean": 0.002011772339756135, "epoch": 0.8025663458734325, "grad_norm": 0.15824578702449799, "learning_rate": 1e-06, "loss": -0.0131, "step": 344 }, { "clip_ratio/high_max": 0.0024287665510200895, "clip_ratio/high_mean": 0.0010701709761633538, "clip_ratio/low_mean": 0.0005244867297733435, "clip_ratio/low_min": 4.60348637716379e-05, "clip_ratio/region_mean": 0.0015946576968417503, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2066.0, "completions/mean_length": 801.6574096679688, "completions/mean_terminated_length": 569.4396362304688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.8048993875765529, "grad_norm": 0.25556159019470215, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 53005413.0, "reward": 0.6383928656578064, "reward_std": 0.18126347661018372, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 345 }, { "clip_ratio/high_max": 0.0026033492176793516, "clip_ratio/high_mean": 0.0011321816746203694, "clip_ratio/low_mean": 0.0007589621236547828, "clip_ratio/low_min": 1.8463810192770325e-05, "clip_ratio/region_mean": 0.0018911437728093006, "epoch": 0.8072324292796734, "grad_norm": 0.2019054889678955, "learning_rate": 1e-06, "loss": -0.0214, "step": 346 }, { "clip_ratio/high_max": 0.0024528612120775506, "clip_ratio/high_mean": 0.0010744591600087006, "clip_ratio/low_mean": 0.0009249952199752443, "clip_ratio/low_min": 5.539143239730038e-05, "clip_ratio/region_mean": 0.001999454398173839, "epoch": 0.8095654709827939, "grad_norm": 0.18154841661453247, "learning_rate": 1e-06, "loss": -0.0215, "step": 347 }, { "clip_ratio/high_max": 0.002686970961804036, "clip_ratio/high_mean": 0.0011972458305535838, "clip_ratio/low_mean": 0.0011541733802005183, "clip_ratio/low_min": 0.00010741467849584296, "clip_ratio/region_mean": 0.002351419192564208, "epoch": 0.8118985126859143, "grad_norm": 0.179201140999794, "learning_rate": 1e-06, "loss": -0.0217, "step": 348 }, { "clip_ratio/high_max": 0.002198454505560221, "clip_ratio/high_mean": 0.0009481786855758401, "clip_ratio/low_mean": 0.0006049338808225002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015531125500274356, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 877.8605346679688, "completions/mean_terminated_length": 588.149658203125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.8142315543890347, "grad_norm": 0.22538886964321136, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 53568944.0, "reward": 0.6573660969734192, "reward_std": 0.1759275496006012, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 349 }, { "clip_ratio/high_max": 0.0029960423926240765, "clip_ratio/high_mean": 0.001205331936944276, "clip_ratio/low_mean": 0.0009345814778498607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021399134275270626, "epoch": 0.8165645960921551, "grad_norm": 0.25057047605514526, "learning_rate": 1e-06, "loss": -0.0398, "step": 350 }, { "clip_ratio/high_max": 0.002933300071163103, "clip_ratio/high_mean": 0.0012073634934495203, "clip_ratio/low_mean": 0.00103660965396557, "clip_ratio/low_min": 2.9634898965014145e-05, "clip_ratio/region_mean": 0.0022439731619670056, "epoch": 0.8188976377952756, "grad_norm": 0.18622159957885742, "learning_rate": 1e-06, "loss": -0.04, "step": 351 }, { "clip_ratio/high_max": 0.0032266734924633056, "clip_ratio/high_mean": 0.0013009330614295322, "clip_ratio/low_mean": 0.0013344781218620483, "clip_ratio/low_min": 2.133105772372801e-05, "clip_ratio/region_mean": 0.002635411190567538, "epoch": 0.821230679498396, "grad_norm": 0.17074429988861084, "learning_rate": 1e-06, "loss": -0.0402, "step": 352 }, { "clip_ratio/high_max": 0.0017971512352232821, "clip_ratio/high_mean": 0.0007026633556961315, "clip_ratio/low_mean": 0.0006032489382050699, "clip_ratio/low_min": 1.3160665730538312e-05, "clip_ratio/region_mean": 0.0013059123120910954, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3867.0, "completions/mean_length": 921.6998291015625, "completions/mean_terminated_length": 610.493896484375, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.8235637212015164, "grad_norm": 0.2515932023525238, "learning_rate": 1e-06, "loss": -0.0245, "num_tokens": 54168955.0, "reward": 0.5424107313156128, "reward_std": 0.15988317131996155, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 353 }, { "clip_ratio/high_max": 0.0021717835843446665, "clip_ratio/high_mean": 0.0007233772030303953, "clip_ratio/low_mean": 0.0008419789210165618, "clip_ratio/low_min": 1.4545031262969133e-05, "clip_ratio/region_mean": 0.0015653561240469571, "epoch": 0.8258967629046369, "grad_norm": 0.1717970371246338, "learning_rate": 1e-06, "loss": -0.0247, "step": 354 }, { "clip_ratio/high_max": 0.0023595982711412944, "clip_ratio/high_mean": 0.000842728490169975, "clip_ratio/low_mean": 0.0009849208618106786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018276493137818761, "epoch": 0.8282298046077574, "grad_norm": 0.1771852672100067, "learning_rate": 1e-06, "loss": -0.0248, "step": 355 }, { "clip_ratio/high_max": 0.00218829129516962, "clip_ratio/high_mean": 0.0008186861086869612, "clip_ratio/low_mean": 0.0012674419085669797, "clip_ratio/low_min": 1.3160665730538312e-05, "clip_ratio/region_mean": 0.0020861279699602164, "epoch": 0.8305628463108778, "grad_norm": 0.15224948525428772, "learning_rate": 1e-06, "loss": -0.0249, "step": 356 }, { "clip_ratio/high_max": 0.0019288021903776098, "clip_ratio/high_mean": 0.0007495696963815135, "clip_ratio/low_mean": 0.00046653347021674563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012161031409050338, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3902.0, "completions/mean_length": 918.7522583007812, "completions/mean_terminated_length": 615.7872924804688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 0.8328958880139983, "grad_norm": 0.21693141758441925, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 54760221.0, "reward": 0.5491071939468384, "reward_std": 0.1401536762714386, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 357 }, { "clip_ratio/high_max": 0.0026613782247295603, "clip_ratio/high_mean": 0.0009070894393516937, "clip_ratio/low_mean": 0.0006486262627731776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015557157057628501, "epoch": 0.8352289297171187, "grad_norm": 0.179405078291893, "learning_rate": 1e-06, "loss": -0.0146, "step": 358 }, { "clip_ratio/high_max": 0.0024907990446081385, "clip_ratio/high_mean": 0.0009263725514756516, "clip_ratio/low_mean": 0.0008601827576057985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017865552872535773, "epoch": 0.8375619714202391, "grad_norm": 0.15576620399951935, "learning_rate": 1e-06, "loss": -0.0148, "step": 359 }, { "clip_ratio/high_max": 0.002331049137865193, "clip_ratio/high_mean": 0.0008176261253538541, "clip_ratio/low_mean": 0.0009222162534570089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017398423842678312, "epoch": 0.8398950131233596, "grad_norm": 0.18607597053050995, "learning_rate": 1e-06, "loss": -0.0148, "step": 360 }, { "clip_ratio/high_max": 0.0020165567293588538, "clip_ratio/high_mean": 0.000853951276440057, "clip_ratio/low_mean": 0.0006930507770448457, "clip_ratio/low_min": 2.6749410608317703e-05, "clip_ratio/region_mean": 0.0015470020662178285, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 865.521240234375, "completions/mean_terminated_length": 591.7518310546875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.84222805482648, "grad_norm": 0.24153633415699005, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 55336968.0, "reward": 0.5915178656578064, "reward_std": 0.1732928454875946, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 361 }, { "clip_ratio/high_max": 0.002464430181134958, "clip_ratio/high_mean": 0.0010238850773021113, "clip_ratio/low_mean": 0.0009376328980579274, "clip_ratio/low_min": 2.2498199541587383e-05, "clip_ratio/region_mean": 0.0019615179335232824, "epoch": 0.8445610965296004, "grad_norm": 0.20365145802497864, "learning_rate": 1e-06, "loss": -0.0376, "step": 362 }, { "clip_ratio/high_max": 0.002556008934334386, "clip_ratio/high_mean": 0.0010945256326522212, "clip_ratio/low_mean": 0.001119626198487822, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002214151834778022, "epoch": 0.8468941382327209, "grad_norm": 0.18342220783233643, "learning_rate": 1e-06, "loss": -0.0377, "step": 363 }, { "clip_ratio/high_max": 0.0025188724284817, "clip_ratio/high_mean": 0.0009661511594458716, "clip_ratio/low_mean": 0.0013099270981911104, "clip_ratio/low_min": 2.2123893359093927e-05, "clip_ratio/region_mean": 0.002276078288559802, "epoch": 0.8492271799358414, "grad_norm": 0.183487206697464, "learning_rate": 1e-06, "loss": -0.0378, "step": 364 }, { "clip_ratio/high_max": 0.0023666196648264304, "clip_ratio/high_mean": 0.000941422698815586, "clip_ratio/low_mean": 0.0005456536673591472, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014870763588987757, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 881.25341796875, "completions/mean_terminated_length": 629.799072265625, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 0.8515602216389618, "grad_norm": 0.21291105449199677, "learning_rate": 1e-06, "loss": -0.0398, "num_tokens": 55947411.0, "reward": 0.6417410969734192, "reward_std": 0.16958087682724, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975653409957886, "step": 365 }, { "clip_ratio/high_max": 0.0023889943404356018, "clip_ratio/high_mean": 0.0010003773859352805, "clip_ratio/low_mean": 0.0007598972979394603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001760274768457748, "epoch": 0.8538932633420823, "grad_norm": 0.18001306056976318, "learning_rate": 1e-06, "loss": -0.0401, "step": 366 }, { "clip_ratio/high_max": 0.002810595411574468, "clip_ratio/high_mean": 0.0011152851329825353, "clip_ratio/low_mean": 0.0008203240631701192, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019356091434019618, "epoch": 0.8562263050452027, "grad_norm": 0.16618499159812927, "learning_rate": 1e-06, "loss": -0.0402, "step": 367 }, { "clip_ratio/high_max": 0.002615925644931849, "clip_ratio/high_mean": 0.0010511315558687784, "clip_ratio/low_mean": 0.0010743256916612154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021254572566249408, "epoch": 0.8585593467483231, "grad_norm": 0.1623554676771164, "learning_rate": 1e-06, "loss": -0.0403, "step": 368 }, { "clip_ratio/high_max": 0.002231936941825552, "clip_ratio/high_mean": 0.0008358096129086334, "clip_ratio/low_mean": 0.0006096504575907602, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014454600895987824, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 927.8984985351562, "completions/mean_terminated_length": 600.1637573242188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.8608923884514436, "grad_norm": 0.2518787682056427, "learning_rate": 1e-06, "loss": -0.0199, "num_tokens": 56532280.0, "reward": 0.5680803656578064, "reward_std": 0.18975545465946198, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 369 }, { "clip_ratio/high_max": 0.0022443662091973238, "clip_ratio/high_mean": 0.0009726381431391928, "clip_ratio/low_mean": 0.0009172410991595825, "clip_ratio/low_min": 2.8811271477025002e-05, "clip_ratio/region_mean": 0.0018898792513937224, "epoch": 0.863225430154564, "grad_norm": 0.21699672937393188, "learning_rate": 1e-06, "loss": -0.0203, "step": 370 }, { "clip_ratio/high_max": 0.002635184835526161, "clip_ratio/high_mean": 0.0010936449471046217, "clip_ratio/low_mean": 0.0010467112697369885, "clip_ratio/low_min": 3.5840941563947126e-05, "clip_ratio/region_mean": 0.00214035621684161, "epoch": 0.8655584718576844, "grad_norm": 0.18343502283096313, "learning_rate": 1e-06, "loss": -0.0204, "step": 371 }, { "clip_ratio/high_max": 0.002463686316332314, "clip_ratio/high_mean": 0.000991379471088294, "clip_ratio/low_mean": 0.0012978556478628889, "clip_ratio/low_min": 8.788790728431195e-05, "clip_ratio/region_mean": 0.0022892351262271404, "epoch": 0.8678915135608049, "grad_norm": 0.20154787600040436, "learning_rate": 1e-06, "loss": -0.0205, "step": 372 }, { "clip_ratio/high_max": 0.0015627559732820373, "clip_ratio/high_mean": 0.0007060485222609714, "clip_ratio/low_mean": 0.0005485578758452903, "clip_ratio/low_min": 9.521632819087245e-06, "clip_ratio/region_mean": 0.0012546063589979894, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 925.9285888671875, "completions/mean_terminated_length": 648.9320678710938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.8702245552639254, "grad_norm": 0.2302597016096115, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 57143752.0, "reward": 0.5613839626312256, "reward_std": 0.17092610895633698, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 373 }, { "clip_ratio/high_max": 0.0020515899450401776, "clip_ratio/high_mean": 0.0007992923783604056, "clip_ratio/low_mean": 0.0007848022023608792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015840945925447159, "epoch": 0.8725575969670458, "grad_norm": 0.17715898156166077, "learning_rate": 1e-06, "loss": -0.0181, "step": 374 }, { "clip_ratio/high_max": 0.0024287963824463077, "clip_ratio/high_mean": 0.0008912174562283326, "clip_ratio/low_mean": 0.0009563523071847158, "clip_ratio/low_min": 2.8564900276251137e-05, "clip_ratio/region_mean": 0.0018475697725079954, "epoch": 0.8748906386701663, "grad_norm": 0.245353102684021, "learning_rate": 1e-06, "loss": -0.0182, "step": 375 }, { "clip_ratio/high_max": 0.0021456541326188017, "clip_ratio/high_mean": 0.0008803438231552718, "clip_ratio/low_mean": 0.001216527227370534, "clip_ratio/low_min": 2.196836612711195e-05, "clip_ratio/region_mean": 0.002096871074172668, "epoch": 0.8772236803732867, "grad_norm": 0.15898433327674866, "learning_rate": 1e-06, "loss": -0.0184, "step": 376 }, { "clip_ratio/high_max": 0.002165385289117694, "clip_ratio/high_mean": 0.0008119576868921285, "clip_ratio/low_mean": 0.000668839120407938, "clip_ratio/low_min": 2.529340417822823e-05, "clip_ratio/region_mean": 0.0014807967891101725, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3139.0, "completions/mean_length": 963.4910888671875, "completions/mean_terminated_length": 647.9312133789062, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 0.8795567220764071, "grad_norm": 0.21807657182216644, "learning_rate": 1e-06, "loss": -0.0364, "num_tokens": 57758664.0, "reward": 0.5412946939468384, "reward_std": 0.19512638449668884, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 377 }, { "clip_ratio/high_max": 0.002510105274268426, "clip_ratio/high_mean": 0.001023588039970491, "clip_ratio/low_mean": 0.0008575574447604595, "clip_ratio/low_min": 3.056949663005071e-05, "clip_ratio/region_mean": 0.0018811454792739823, "epoch": 0.8818897637795275, "grad_norm": 0.24140609800815582, "learning_rate": 1e-06, "loss": -0.0367, "step": 378 }, { "clip_ratio/high_max": 0.00252076792094158, "clip_ratio/high_mean": 0.0010582318172964733, "clip_ratio/low_mean": 0.0010428122513985727, "clip_ratio/low_min": 3.6654876566899475e-05, "clip_ratio/region_mean": 0.0021010440614190884, "epoch": 0.884222805482648, "grad_norm": 0.1708415299654007, "learning_rate": 1e-06, "loss": -0.0368, "step": 379 }, { "clip_ratio/high_max": 0.0025287936223321594, "clip_ratio/high_mean": 0.0010501180331630167, "clip_ratio/low_mean": 0.0013154496082279366, "clip_ratio/low_min": 9.351067819807213e-05, "clip_ratio/region_mean": 0.002365567663218826, "epoch": 0.8865558471857684, "grad_norm": 0.17794418334960938, "learning_rate": 1e-06, "loss": -0.0369, "step": 380 }, { "clip_ratio/high_max": 0.002487028541509062, "clip_ratio/high_mean": 0.0010293005434505176, "clip_ratio/low_mean": 0.000717295255526551, "clip_ratio/low_min": 1.2608432371052913e-05, "clip_ratio/region_mean": 0.0017465958153479733, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 919.2344360351562, "completions/mean_terminated_length": 629.0304565429688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.8888888888888888, "grad_norm": 0.24101272225379944, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 58358554.0, "reward": 0.5625, "reward_std": 0.2137964516878128, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 381 }, { "clip_ratio/high_max": 0.002524464900488965, "clip_ratio/high_mean": 0.0012198094445921015, "clip_ratio/low_mean": 0.0009389289261889644, "clip_ratio/low_min": 1.2608432371052913e-05, "clip_ratio/region_mean": 0.002158738367143087, "epoch": 0.8912219305920094, "grad_norm": 0.20234255492687225, "learning_rate": 1e-06, "loss": -0.0447, "step": 382 }, { "clip_ratio/high_max": 0.0027121251841890626, "clip_ratio/high_mean": 0.0012514218615251593, "clip_ratio/low_mean": 0.0012125361117796274, "clip_ratio/low_min": 1.2014609637844842e-05, "clip_ratio/region_mean": 0.0024639579642098397, "epoch": 0.8935549722951298, "grad_norm": 0.18023191392421722, "learning_rate": 1e-06, "loss": -0.0449, "step": 383 }, { "clip_ratio/high_max": 0.0028525425732368603, "clip_ratio/high_mean": 0.0012203649239381775, "clip_ratio/low_mean": 0.0013325776926649269, "clip_ratio/low_min": 4.805843855137937e-05, "clip_ratio/region_mean": 0.0025529425765853375, "epoch": 0.8958880139982502, "grad_norm": 0.2880335748195648, "learning_rate": 1e-06, "loss": -0.0449, "step": 384 }, { "clip_ratio/high_max": 0.0023073976262821816, "clip_ratio/high_mean": 0.0009232878073817119, "clip_ratio/low_mean": 0.000623939338765922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015472271697944961, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3576.0, "completions/mean_length": 988.0011596679688, "completions/mean_terminated_length": 610.6846313476562, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 0.8982210557013707, "grad_norm": 0.22138522565364838, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 58937787.0, "reward": 0.546875, "reward_std": 0.17461372911930084, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 385 }, { "clip_ratio/high_max": 0.002377451295615174, "clip_ratio/high_mean": 0.000971550065514748, "clip_ratio/low_mean": 0.0008350397292815614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001806589811167214, "epoch": 0.9005540974044911, "grad_norm": 0.21801231801509857, "learning_rate": 1e-06, "loss": -0.0396, "step": 386 }, { "clip_ratio/high_max": 0.0024675863969605416, "clip_ratio/high_mean": 0.001060649034116068, "clip_ratio/low_mean": 0.0009746981941134436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020353472282295115, "epoch": 0.9028871391076115, "grad_norm": 0.18678408861160278, "learning_rate": 1e-06, "loss": -0.0397, "step": 387 }, { "clip_ratio/high_max": 0.002575601582066156, "clip_ratio/high_mean": 0.0010663571447366849, "clip_ratio/low_mean": 0.0012509522930486128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002317309415957425, "epoch": 0.905220180810732, "grad_norm": 0.19864848256111145, "learning_rate": 1e-06, "loss": -0.0399, "step": 388 }, { "clip_ratio/high_max": 0.0014403713648789562, "clip_ratio/high_mean": 0.0005397231088863919, "clip_ratio/low_mean": 0.0004968672092218185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010365903108322527, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3012.0, "completions/mean_length": 1146.665283203125, "completions/mean_terminated_length": 672.9378051757812, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 0.9075532225138524, "grad_norm": 0.2242993712425232, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 59543223.0, "reward": 0.4933035969734192, "reward_std": 0.14199379086494446, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 389 }, { "clip_ratio/high_max": 0.001978857944777701, "clip_ratio/high_mean": 0.0007474165613530204, "clip_ratio/low_mean": 0.0006779378245482803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014253543886297848, "epoch": 0.9098862642169728, "grad_norm": 0.1980229914188385, "learning_rate": 1e-06, "loss": -0.033, "step": 390 }, { "clip_ratio/high_max": 0.0021036925827502273, "clip_ratio/high_mean": 0.0007958486821735278, "clip_ratio/low_mean": 0.0007948368183861021, "clip_ratio/low_min": 3.2445059332530946e-05, "clip_ratio/region_mean": 0.0015906855296634603, "epoch": 0.9122193059200934, "grad_norm": 0.18185946345329285, "learning_rate": 1e-06, "loss": -0.0331, "step": 391 }, { "clip_ratio/high_max": 0.0019272925237601157, "clip_ratio/high_mean": 0.0007416932021442335, "clip_ratio/low_mean": 0.0009933531619026326, "clip_ratio/low_min": 3.4594097087392583e-05, "clip_ratio/region_mean": 0.0017350463895127177, "epoch": 0.9145523476232138, "grad_norm": 0.16489240527153015, "learning_rate": 1e-06, "loss": -0.0331, "step": 392 }, { "clip_ratio/high_max": 0.0018829276377800852, "clip_ratio/high_mean": 0.0008267601297120564, "clip_ratio/low_mean": 0.0007134396828405443, "clip_ratio/low_min": 3.410331191844307e-05, "clip_ratio/region_mean": 0.0015401998243760318, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 1032.266845703125, "completions/mean_terminated_length": 625.5765380859375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 0.9168853893263342, "grad_norm": 0.22167567908763885, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 60137270.0, "reward": 0.5111607313156128, "reward_std": 0.17851904034614563, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 393 }, { "clip_ratio/high_max": 0.002248465272714384, "clip_ratio/high_mean": 0.0009830765520746354, "clip_ratio/low_mean": 0.000986932851446909, "clip_ratio/low_min": 4.5183445763541386e-05, "clip_ratio/region_mean": 0.0019700093725987244, "epoch": 0.9192184310294547, "grad_norm": 0.21001669764518738, "learning_rate": 1e-06, "loss": -0.0318, "step": 394 }, { "clip_ratio/high_max": 0.0024827140441630036, "clip_ratio/high_mean": 0.0010166514584852848, "clip_ratio/low_mean": 0.00117380219126062, "clip_ratio/low_min": 5.6479304475942627e-05, "clip_ratio/region_mean": 0.0021904536624788307, "epoch": 0.9215514727325751, "grad_norm": 0.20626439154148102, "learning_rate": 1e-06, "loss": -0.0319, "step": 395 }, { "clip_ratio/high_max": 0.002260945359012112, "clip_ratio/high_mean": 0.0009840688053373015, "clip_ratio/low_mean": 0.0013305551474331878, "clip_ratio/low_min": 5.683884955942631e-05, "clip_ratio/region_mean": 0.002314623910933733, "epoch": 0.9238845144356955, "grad_norm": 0.18903161585330963, "learning_rate": 1e-06, "loss": -0.032, "step": 396 }, { "clip_ratio/high_max": 0.0024143254122463986, "clip_ratio/high_mean": 0.0008168222502717981, "clip_ratio/low_mean": 0.0005513674987014383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001368189728964353, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3614.0, "completions/mean_length": 1145.763427734375, "completions/mean_terminated_length": 626.9553833007812, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 0.926217556138816, "grad_norm": 0.22756241261959076, "learning_rate": 1e-06, "loss": -0.05, "num_tokens": 60705274.0, "reward": 0.5345982313156128, "reward_std": 0.156542107462883, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.499080091714859, "step": 397 }, { "clip_ratio/high_max": 0.002396913645497989, "clip_ratio/high_mean": 0.0009040411860041786, "clip_ratio/low_mean": 0.0007567752163595287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016608163641649298, "epoch": 0.9285505978419364, "grad_norm": 0.1984955072402954, "learning_rate": 1e-06, "loss": -0.0502, "step": 398 }, { "clip_ratio/high_max": 0.002589438794529997, "clip_ratio/high_mean": 0.0009209879790432751, "clip_ratio/low_mean": 0.0009547004647174617, "clip_ratio/low_min": 1.715148209768813e-05, "clip_ratio/region_mean": 0.0018756884637696203, "epoch": 0.9308836395450568, "grad_norm": 0.18810021877288818, "learning_rate": 1e-06, "loss": -0.0503, "step": 399 }, { "clip_ratio/high_max": 0.0030086884726188146, "clip_ratio/high_mean": 0.0010029266068158904, "clip_ratio/low_mean": 0.001136030974521418, "clip_ratio/low_min": 3.430296419537626e-05, "clip_ratio/region_mean": 0.002138957632269012, "epoch": 0.9332166812481774, "grad_norm": 0.1831684708595276, "learning_rate": 1e-06, "loss": -0.0504, "step": 400 }, { "clip_ratio/high_max": 0.0022944446245674044, "clip_ratio/high_mean": 0.0009729608082125196, "clip_ratio/low_mean": 0.0006262494916882133, "clip_ratio/low_min": 2.6870164219872095e-05, "clip_ratio/region_mean": 0.001599210318090627, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3530.0, "completions/mean_length": 968.2835083007812, "completions/mean_terminated_length": 627.6410522460938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.9355497229512978, "grad_norm": 0.2524567246437073, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 61297928.0, "reward": 0.5691964626312256, "reward_std": 0.1988469660282135, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 401 }, { "clip_ratio/high_max": 0.0024275017785839736, "clip_ratio/high_mean": 0.0011202143268747022, "clip_ratio/low_mean": 0.0008954741042543901, "clip_ratio/low_min": 2.6870164219872095e-05, "clip_ratio/region_mean": 0.0020156884420430288, "epoch": 0.9378827646544182, "grad_norm": 0.19105035066604614, "learning_rate": 1e-06, "loss": -0.0508, "step": 402 }, { "clip_ratio/high_max": 0.0025642600739956833, "clip_ratio/high_mean": 0.0011850045411847532, "clip_ratio/low_mean": 0.0010663451394066215, "clip_ratio/low_min": 8.061048720264807e-05, "clip_ratio/region_mean": 0.0022513496660394594, "epoch": 0.9402158063575387, "grad_norm": 0.1914723962545395, "learning_rate": 1e-06, "loss": -0.0509, "step": 403 }, { "clip_ratio/high_max": 0.002771794876025524, "clip_ratio/high_mean": 0.0011238851147936657, "clip_ratio/low_mean": 0.0012456118020054419, "clip_ratio/low_min": 5.5915901612024754e-05, "clip_ratio/region_mean": 0.0023694968404015526, "epoch": 0.9425488480606591, "grad_norm": 0.1938973218202591, "learning_rate": 1e-06, "loss": -0.0511, "step": 404 }, { "clip_ratio/high_max": 0.002230028661870165, "clip_ratio/high_mean": 0.0007739496850263095, "clip_ratio/low_mean": 0.0006409202414943138, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00141486987558892, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 962.7857666015625, "completions/mean_terminated_length": 621.5445556640625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 0.9448818897637795, "grad_norm": 0.22243300080299377, "learning_rate": 1e-06, "loss": -0.0346, "num_tokens": 61900048.0, "reward": 0.5167410969734192, "reward_std": 0.155902698636055, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 405 }, { "clip_ratio/high_max": 0.0027140029123984277, "clip_ratio/high_mean": 0.0009580726582498755, "clip_ratio/low_mean": 0.0008908036925276974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018488763671484776, "epoch": 0.9472149314669, "grad_norm": 0.19398583471775055, "learning_rate": 1e-06, "loss": -0.0348, "step": 406 }, { "clip_ratio/high_max": 0.0027486858452903107, "clip_ratio/high_mean": 0.0010101074276462896, "clip_ratio/low_mean": 0.0010566887012828374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020667961143772118, "epoch": 0.9495479731700204, "grad_norm": 0.17073506116867065, "learning_rate": 1e-06, "loss": -0.035, "step": 407 }, { "clip_ratio/high_max": 0.002703230216866359, "clip_ratio/high_mean": 0.0008891025681805331, "clip_ratio/low_mean": 0.0011867632620123914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002075865850201808, "epoch": 0.9518810148731408, "grad_norm": 0.17584951221942902, "learning_rate": 1e-06, "loss": -0.035, "step": 408 }, { "clip_ratio/high_max": 0.0017499868990853429, "clip_ratio/high_mean": 0.0006895441420056159, "clip_ratio/low_mean": 0.0005366919976950157, "clip_ratio/low_min": 2.0552450223476626e-05, "clip_ratio/region_mean": 0.001226236159709515, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3481.0, "completions/mean_length": 1060.05810546875, "completions/mean_terminated_length": 674.3597412109375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.9542140565762613, "grad_norm": 0.21991465985774994, "learning_rate": 1e-06, "loss": -0.0314, "num_tokens": 62523628.0, "reward": 0.5178571939468384, "reward_std": 0.1661185324192047, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 409 }, { "clip_ratio/high_max": 0.002041467319941148, "clip_ratio/high_mean": 0.0008404169966524933, "clip_ratio/low_mean": 0.0008108198853733484, "clip_ratio/low_min": 9.36329615797149e-06, "clip_ratio/region_mean": 0.0016512368674739264, "epoch": 0.9565470982793818, "grad_norm": 0.2058495134115219, "learning_rate": 1e-06, "loss": -0.0316, "step": 410 }, { "clip_ratio/high_max": 0.002102226069837343, "clip_ratio/high_mean": 0.000885735877091065, "clip_ratio/low_mean": 0.0010361661043134518, "clip_ratio/low_min": 3.745318463188596e-05, "clip_ratio/region_mean": 0.0019219019668526016, "epoch": 0.9588801399825022, "grad_norm": 0.16779199242591858, "learning_rate": 1e-06, "loss": -0.0318, "step": 411 }, { "clip_ratio/high_max": 0.002049068563792389, "clip_ratio/high_mean": 0.0008057550767262001, "clip_ratio/low_mean": 0.0011653085093712434, "clip_ratio/low_min": 1.872659231594298e-05, "clip_ratio/region_mean": 0.00197106359701138, "epoch": 0.9612131816856226, "grad_norm": 0.20040445029735565, "learning_rate": 1e-06, "loss": -0.0319, "step": 412 }, { "clip_ratio/high_max": 0.0021113881448400207, "clip_ratio/high_mean": 0.0008135189400491072, "clip_ratio/low_mean": 0.0006807227355238865, "clip_ratio/low_min": 5.56997511012014e-05, "clip_ratio/region_mean": 0.0014942416819394566, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 1005.1975708007812, "completions/mean_terminated_length": 625.6253051757812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 0.9635462233887431, "grad_norm": 0.2439853399991989, "learning_rate": 1e-06, "loss": -0.0488, "num_tokens": 63115821.0, "reward": 0.5546875, "reward_std": 0.171687051653862, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 413 }, { "clip_ratio/high_max": 0.0029173197981435806, "clip_ratio/high_mean": 0.001055622829881031, "clip_ratio/low_mean": 0.0009044086964422604, "clip_ratio/low_min": 3.234574705857085e-05, "clip_ratio/region_mean": 0.001960031564522069, "epoch": 0.9658792650918635, "grad_norm": 0.212842658162117, "learning_rate": 1e-06, "loss": -0.0491, "step": 414 }, { "clip_ratio/high_max": 0.002813082253851462, "clip_ratio/high_mean": 0.0010721070702857105, "clip_ratio/low_mean": 0.0011335684757796116, "clip_ratio/low_min": 0.00010438599565532058, "clip_ratio/region_mean": 0.0022056755478843115, "epoch": 0.9682123067949839, "grad_norm": 0.18718186020851135, "learning_rate": 1e-06, "loss": -0.0492, "step": 415 }, { "clip_ratio/high_max": 0.002706474653678015, "clip_ratio/high_mean": 0.0010399333223176654, "clip_ratio/low_mean": 0.0013874257856514305, "clip_ratio/low_min": 0.00010292992737959139, "clip_ratio/region_mean": 0.0024273590970551595, "epoch": 0.9705453484981044, "grad_norm": 0.16945691406726837, "learning_rate": 1e-06, "loss": -0.0493, "step": 416 }, { "clip_ratio/high_max": 0.002532029800931923, "clip_ratio/high_mean": 0.00104540411484777, "clip_ratio/low_mean": 0.0006206272819326841, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001666031384957023, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 929.4297485351562, "completions/mean_terminated_length": 597.5449829101562, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.9728783902012248, "grad_norm": 0.2921268939971924, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 63688326.0, "reward": 0.5401785969734192, "reward_std": 0.19787125289440155, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 417 }, { "clip_ratio/high_max": 0.002880187767004827, "clip_ratio/high_mean": 0.0012010327791358577, "clip_ratio/low_mean": 0.0008800021423667204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020810349014936946, "epoch": 0.9752114319043453, "grad_norm": 0.22427064180374146, "learning_rate": 1e-06, "loss": -0.0273, "step": 418 }, { "clip_ratio/high_max": 0.002787976758554578, "clip_ratio/high_mean": 0.0011979122064076364, "clip_ratio/low_mean": 0.0011710269518516725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002368939225561917, "epoch": 0.9775444736074658, "grad_norm": 0.22764481604099274, "learning_rate": 1e-06, "loss": -0.0275, "step": 419 }, { "clip_ratio/high_max": 0.0028928016909048893, "clip_ratio/high_mean": 0.0012647529365494847, "clip_ratio/low_mean": 0.0013513580543076387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026161109999520704, "epoch": 0.9798775153105862, "grad_norm": 0.19134294986724854, "learning_rate": 1e-06, "loss": -0.0276, "step": 420 }, { "clip_ratio/high_max": 0.0018266616134496871, "clip_ratio/high_mean": 0.0007190967153292149, "clip_ratio/low_mean": 0.0005109191979499883, "clip_ratio/low_min": 1.6720170606276952e-05, "clip_ratio/region_mean": 0.001230015892360825, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 982.4832763671875, "completions/mean_terminated_length": 621.88916015625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 0.9822105570137066, "grad_norm": 0.20579728484153748, "learning_rate": 1e-06, "loss": -0.0488, "num_tokens": 64269895.0, "reward": 0.574776828289032, "reward_std": 0.14571575820446014, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 421 }, { "clip_ratio/high_max": 0.0022223303021746688, "clip_ratio/high_mean": 0.0008412895622313954, "clip_ratio/low_mean": 0.0006025078419042984, "clip_ratio/low_min": 1.1704119970090687e-05, "clip_ratio/region_mean": 0.0014437974132306408, "epoch": 0.9845435987168271, "grad_norm": 0.1702871322631836, "learning_rate": 1e-06, "loss": -0.0489, "step": 422 }, { "clip_ratio/high_max": 0.002629214672197122, "clip_ratio/high_mean": 0.0008907833580451552, "clip_ratio/low_mean": 0.000756228415411897, "clip_ratio/low_min": 4.724111931864172e-05, "clip_ratio/region_mean": 0.0016470117516291793, "epoch": 0.9868766404199475, "grad_norm": 0.18380087614059448, "learning_rate": 1e-06, "loss": -0.0491, "step": 423 }, { "clip_ratio/high_max": 0.0024758439176366664, "clip_ratio/high_mean": 0.0008649492338008713, "clip_ratio/low_mean": 0.0009179535700241104, "clip_ratio/low_min": 7.022471982054412e-05, "clip_ratio/region_mean": 0.0017829028074629605, "epoch": 0.9892096821230679, "grad_norm": 0.1679142862558365, "learning_rate": 1e-06, "loss": -0.0491, "step": 424 }, { "clip_ratio/high_max": 0.0023570269368065055, "clip_ratio/high_mean": 0.0010959362571156817, "clip_ratio/low_mean": 0.000697734321875032, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017936705917236395, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 995.2154541015625, "completions/mean_terminated_length": 605.6696166992188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.9915427238261884, "grad_norm": 0.310123085975647, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 64848512.0, "reward": 0.5412946939468384, "reward_std": 0.20440296828746796, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 425 }, { "clip_ratio/high_max": 0.0029505515594792087, "clip_ratio/high_mean": 0.0013292095172801055, "clip_ratio/low_mean": 0.001031558427712298, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023607679031556472, "epoch": 0.9938757655293088, "grad_norm": 0.25105151534080505, "learning_rate": 1e-06, "loss": -0.0435, "step": 426 }, { "clip_ratio/high_max": 0.0030129859660519287, "clip_ratio/high_mean": 0.0014149835515127052, "clip_ratio/low_mean": 0.001323643886280479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002738627459621057, "epoch": 0.9962088072324293, "grad_norm": 0.2301369458436966, "learning_rate": 1e-06, "loss": -0.0438, "step": 427 }, { "clip_ratio/high_max": 0.0031693995551904663, "clip_ratio/high_mean": 0.001362594950478524, "clip_ratio/low_mean": 0.0015099316769919824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028725266965921037, "epoch": 0.9985418489355498, "grad_norm": 0.21407221257686615, "learning_rate": 1e-06, "loss": -0.0439, "step": 428 }, { "clip_ratio/high_max": 0.00188614493526984, "clip_ratio/high_mean": 0.000804677469204762, "clip_ratio/low_mean": 0.0006909416897542542, "clip_ratio/low_min": 1.371215421386296e-05, "clip_ratio/region_mean": 0.0014956191917008255, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 850.3594360351562, "completions/mean_terminated_length": 617.4186401367188, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 1.0023330417031204, "grad_norm": 0.2333025187253952, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 65455098.0, "reward": 0.6350446939468384, "reward_std": 0.17867198586463928, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 429 }, { "clip_ratio/high_max": 0.0023995939482119866, "clip_ratio/high_mean": 0.0010416395452921279, "clip_ratio/low_mean": 0.0009332010322395945, "clip_ratio/low_min": 4.335260018706322e-05, "clip_ratio/region_mean": 0.001974840590264648, "epoch": 1.0046660834062409, "grad_norm": 0.2440246194601059, "learning_rate": 1e-06, "loss": -0.0317, "step": 430 }, { "clip_ratio/high_max": 0.0022679026078549214, "clip_ratio/high_mean": 0.0009606390412955079, "clip_ratio/low_mean": 0.0011004831012542127, "clip_ratio/low_min": 1.7936576114152558e-05, "clip_ratio/region_mean": 0.0020611221698345616, "epoch": 1.0069991251093613, "grad_norm": 0.17937007546424866, "learning_rate": 1e-06, "loss": -0.0319, "step": 431 }, { "clip_ratio/high_max": 0.002265503804665059, "clip_ratio/high_mean": 0.0009249858303519432, "clip_ratio/low_mean": 0.0011825941983261146, "clip_ratio/low_min": 8.968288057076279e-06, "clip_ratio/region_mean": 0.0021075799959362485, "epoch": 1.0093321668124817, "grad_norm": 0.18658733367919922, "learning_rate": 1e-06, "loss": -0.032, "step": 432 }, { "clip_ratio/high_max": 0.002188250800827518, "clip_ratio/high_mean": 0.0007850914971641032, "clip_ratio/low_mean": 0.0006092657213230268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013943572448624764, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3200.0, "completions/mean_length": 925.9609985351562, "completions/mean_terminated_length": 589.388916015625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.0116652085156022, "grad_norm": 0.24572688341140747, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 66028951.0, "reward": 0.637276828289032, "reward_std": 0.15319034457206726, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 433 }, { "clip_ratio/high_max": 0.0027622638517641462, "clip_ratio/high_mean": 0.0010239535840810277, "clip_ratio/low_mean": 0.0007826659921192913, "clip_ratio/low_min": 2.5562372684362344e-05, "clip_ratio/region_mean": 0.0018066195843857713, "epoch": 1.0139982502187226, "grad_norm": 0.22620150446891785, "learning_rate": 1e-06, "loss": -0.0445, "step": 434 }, { "clip_ratio/high_max": 0.002733622441155603, "clip_ratio/high_mean": 0.0009858197190624196, "clip_ratio/low_mean": 0.0010387497350166086, "clip_ratio/low_min": 1.6297262845910154e-05, "clip_ratio/region_mean": 0.0020245694468030706, "epoch": 1.016331291921843, "grad_norm": 0.18662086129188538, "learning_rate": 1e-06, "loss": -0.0447, "step": 435 }, { "clip_ratio/high_max": 0.002920517210441176, "clip_ratio/high_mean": 0.001070248461473966, "clip_ratio/low_mean": 0.001208086943734088, "clip_ratio/low_min": 3.259452569182031e-05, "clip_ratio/region_mean": 0.00227833535609534, "epoch": 1.0186643336249634, "grad_norm": 0.19832929968833923, "learning_rate": 1e-06, "loss": -0.0448, "step": 436 }, { "clip_ratio/high_max": 0.00233198835485382, "clip_ratio/high_mean": 0.000919369622351951, "clip_ratio/low_mean": 0.0005849066842529282, "clip_ratio/low_min": 2.708247848204337e-05, "clip_ratio/region_mean": 0.0015042763370729517, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 1107.8035888671875, "completions/mean_terminated_length": 645.7113037109375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.020997375328084, "grad_norm": 0.25459355115890503, "learning_rate": 1e-06, "loss": -0.0649, "num_tokens": 66623023.0, "reward": 0.5334821939468384, "reward_std": 0.1828376203775406, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 437 }, { "clip_ratio/high_max": 0.0032297305806423537, "clip_ratio/high_mean": 0.0012008415415039053, "clip_ratio/low_mean": 0.0008006979960555327, "clip_ratio/low_min": 2.0398172637214884e-05, "clip_ratio/region_mean": 0.00200153960759053, "epoch": 1.0233304170312045, "grad_norm": 0.22269797325134277, "learning_rate": 1e-06, "loss": -0.0652, "step": 438 }, { "clip_ratio/high_max": 0.0029371954951784573, "clip_ratio/high_mean": 0.0011158537872688612, "clip_ratio/low_mean": 0.0009924400619638618, "clip_ratio/low_min": 5.387098099163268e-05, "clip_ratio/region_mean": 0.0021082937964820303, "epoch": 1.025663458734325, "grad_norm": 0.17895840108394623, "learning_rate": 1e-06, "loss": -0.0654, "step": 439 }, { "clip_ratio/high_max": 0.0031041721085784957, "clip_ratio/high_mean": 0.0011557863945199642, "clip_ratio/low_mean": 0.0011620121422311058, "clip_ratio/low_min": 6.109514288255014e-05, "clip_ratio/region_mean": 0.0023177985203801654, "epoch": 1.0279965004374454, "grad_norm": 0.1888708770275116, "learning_rate": 1e-06, "loss": -0.0655, "step": 440 }, { "clip_ratio/high_max": 0.002363800063903909, "clip_ratio/high_mean": 0.000765284659792087, "clip_ratio/low_mean": 0.0006764551799278706, "clip_ratio/low_min": 2.4149921955540776e-05, "clip_ratio/region_mean": 0.00144173986336682, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 1028.1195068359375, "completions/mean_terminated_length": 607.6484375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 1.0303295421405658, "grad_norm": 0.29041218757629395, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 67206226.0, "reward": 0.5055803656578064, "reward_std": 0.1731078028678894, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 441 }, { "clip_ratio/high_max": 0.0028002179751638323, "clip_ratio/high_mean": 0.0009567866181896534, "clip_ratio/low_mean": 0.0009720387351990212, "clip_ratio/low_min": 1.2074960977770388e-05, "clip_ratio/region_mean": 0.0019288253606646322, "epoch": 1.0326625838436863, "grad_norm": 0.21335922181606293, "learning_rate": 1e-06, "loss": -0.047, "step": 442 }, { "clip_ratio/high_max": 0.002605081615911331, "clip_ratio/high_mean": 0.0009598919823474716, "clip_ratio/low_mean": 0.001113256159442244, "clip_ratio/low_min": 1.4397603990801144e-05, "clip_ratio/region_mean": 0.002073148156341631, "epoch": 1.0349956255468067, "grad_norm": 0.1877458244562149, "learning_rate": 1e-06, "loss": -0.0472, "step": 443 }, { "clip_ratio/high_max": 0.002722404162341263, "clip_ratio/high_mean": 0.0009160343215626199, "clip_ratio/low_mean": 0.001470162289479049, "clip_ratio/low_min": 5.759041596320458e-05, "clip_ratio/region_mean": 0.0023861965819378383, "epoch": 1.0373286672499271, "grad_norm": 0.17515434324741364, "learning_rate": 1e-06, "loss": -0.0473, "step": 444 }, { "clip_ratio/high_max": 0.0018894981767516583, "clip_ratio/high_mean": 0.0007696930078964215, "clip_ratio/low_mean": 0.0004294440727790061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011991370738542173, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3809.0, "completions/mean_length": 1033.2054443359375, "completions/mean_terminated_length": 591.1928100585938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 1.0396617089530475, "grad_norm": 0.25584280490875244, "learning_rate": 1e-06, "loss": -0.0647, "num_tokens": 67763706.0, "reward": 0.6205357313156128, "reward_std": 0.16398167610168457, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 445 }, { "clip_ratio/high_max": 0.0025248981764889322, "clip_ratio/high_mean": 0.0010226915183011442, "clip_ratio/low_mean": 0.0007047745557429153, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017274660640396178, "epoch": 1.041994750656168, "grad_norm": 0.20202475786209106, "learning_rate": 1e-06, "loss": -0.0649, "step": 446 }, { "clip_ratio/high_max": 0.0024538811994716525, "clip_ratio/high_mean": 0.0010547231504460797, "clip_ratio/low_mean": 0.0008309453060064698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018856684473576024, "epoch": 1.0443277923592884, "grad_norm": 0.17896480858325958, "learning_rate": 1e-06, "loss": -0.0651, "step": 447 }, { "clip_ratio/high_max": 0.0026427094198879786, "clip_ratio/high_mean": 0.0010373942131991498, "clip_ratio/low_mean": 0.0009970391201932216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020344333242974244, "epoch": 1.0466608340624088, "grad_norm": 0.17334654927253723, "learning_rate": 1e-06, "loss": -0.0652, "step": 448 }, { "clip_ratio/high_max": 0.0017308068145212019, "clip_ratio/high_mean": 0.0006800707551519736, "clip_ratio/low_mean": 0.0004515814352998859, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00113165218863287, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3804.0, "completions/mean_length": 1005.1350708007812, "completions/mean_terminated_length": 603.6734008789062, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 1.0489938757655293, "grad_norm": 0.2161651849746704, "learning_rate": 1e-06, "loss": -0.0636, "num_tokens": 68333051.0, "reward": 0.5703125, "reward_std": 0.13084182143211365, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 449 }, { "clip_ratio/high_max": 0.0022399268855224364, "clip_ratio/high_mean": 0.0007800824223522795, "clip_ratio/low_mean": 0.0006723454164330178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001452427837648429, "epoch": 1.0513269174686497, "grad_norm": 0.16871459782123566, "learning_rate": 1e-06, "loss": -0.0639, "step": 450 }, { "clip_ratio/high_max": 0.0026769223622977734, "clip_ratio/high_mean": 0.0008887108624549, "clip_ratio/low_mean": 0.000769434676385572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016581455092818942, "epoch": 1.0536599591717701, "grad_norm": 0.17597126960754395, "learning_rate": 1e-06, "loss": -0.0639, "step": 451 }, { "clip_ratio/high_max": 0.0022117251392046455, "clip_ratio/high_mean": 0.0008225946094171377, "clip_ratio/low_mean": 0.0008988589370346745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017214535691891797, "epoch": 1.0559930008748906, "grad_norm": 0.16394490003585815, "learning_rate": 1e-06, "loss": -0.064, "step": 452 }, { "clip_ratio/high_max": 0.001844191509007942, "clip_ratio/high_mean": 0.0006051035788914305, "clip_ratio/low_mean": 0.0005907940394536126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011958976356254425, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2765.0, "completions/mean_length": 1068.271240234375, "completions/mean_terminated_length": 613.5289306640625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 1.058326042578011, "grad_norm": 0.24267460405826569, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 68922438.0, "reward": 0.5189732313156128, "reward_std": 0.16856057941913605, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 453 }, { "clip_ratio/high_max": 0.002777697729470674, "clip_ratio/high_mean": 0.0010148937635676702, "clip_ratio/low_mean": 0.0007792864762450336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001794180272554513, "epoch": 1.0606590842811314, "grad_norm": 0.2198466807603836, "learning_rate": 1e-06, "loss": -0.0553, "step": 454 }, { "clip_ratio/high_max": 0.0028655557398451492, "clip_ratio/high_mean": 0.001007710066915024, "clip_ratio/low_mean": 0.0010238405156997032, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020315505607868545, "epoch": 1.0629921259842519, "grad_norm": 0.18871402740478516, "learning_rate": 1e-06, "loss": -0.0555, "step": 455 }, { "clip_ratio/high_max": 0.0023621154177817516, "clip_ratio/high_mean": 0.0009254372853320092, "clip_ratio/low_mean": 0.0011858456418849528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021112828471814282, "epoch": 1.0653251676873725, "grad_norm": 0.1881953328847885, "learning_rate": 1e-06, "loss": -0.0556, "step": 456 }, { "clip_ratio/high_max": 0.0020752472410094924, "clip_ratio/high_mean": 0.0008432522045040969, "clip_ratio/low_mean": 0.0003859125345115899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012291647362872027, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 985.64404296875, "completions/mean_terminated_length": 612.4012451171875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.067658209390493, "grad_norm": 0.2506106197834015, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 69500271.0, "reward": 0.5558035969734192, "reward_std": 0.15361177921295166, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 457 }, { "clip_ratio/high_max": 0.002430212880426552, "clip_ratio/high_mean": 0.0009447149423067458, "clip_ratio/low_mean": 0.0006093982910897466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001554113237943966, "epoch": 1.0699912510936134, "grad_norm": 0.19296887516975403, "learning_rate": 1e-06, "loss": -0.0382, "step": 458 }, { "clip_ratio/high_max": 0.0026876567644649185, "clip_ratio/high_mean": 0.0010661713204171974, "clip_ratio/low_mean": 0.0007335922709899023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017997636023210362, "epoch": 1.0723242927967338, "grad_norm": 0.21186336874961853, "learning_rate": 1e-06, "loss": -0.0383, "step": 459 }, { "clip_ratio/high_max": 0.0026717531436588615, "clip_ratio/high_mean": 0.0010539452923694625, "clip_ratio/low_mean": 0.0009490556058153743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002003000867262017, "epoch": 1.0746573344998542, "grad_norm": 0.1976313292980194, "learning_rate": 1e-06, "loss": -0.0384, "step": 460 }, { "clip_ratio/high_max": 0.0017274421516049188, "clip_ratio/high_mean": 0.0006933596951057552, "clip_ratio/low_mean": 0.0006798601580157992, "clip_ratio/low_min": 4.327072747400962e-05, "clip_ratio/region_mean": 0.0013732198531215545, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3699.0, "completions/mean_length": 932.0457763671875, "completions/mean_terminated_length": 617.5914306640625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 1.0769903762029747, "grad_norm": 0.270966500043869, "learning_rate": 1e-06, "loss": -0.0645, "num_tokens": 70091448.0, "reward": 0.6383928656578064, "reward_std": 0.15631499886512756, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 461 }, { "clip_ratio/high_max": 0.0022845575695100706, "clip_ratio/high_mean": 0.0008984553369373316, "clip_ratio/low_mean": 0.0008425280557275983, "clip_ratio/low_min": 2.961796326417243e-05, "clip_ratio/region_mean": 0.0017409833853889722, "epoch": 1.079323417906095, "grad_norm": 0.1994178146123886, "learning_rate": 1e-06, "loss": -0.0647, "step": 462 }, { "clip_ratio/high_max": 0.002211950828495901, "clip_ratio/high_mean": 0.0008875480789356516, "clip_ratio/low_mean": 0.0010095132511196425, "clip_ratio/low_min": 5.4537522373721004e-05, "clip_ratio/region_mean": 0.0018970613418787252, "epoch": 1.0816564596092155, "grad_norm": 0.18105797469615936, "learning_rate": 1e-06, "loss": -0.0648, "step": 463 }, { "clip_ratio/high_max": 0.0021518303838092834, "clip_ratio/high_mean": 0.0008788373797870008, "clip_ratio/low_mean": 0.0012122353618906345, "clip_ratio/low_min": 5.429171142168343e-05, "clip_ratio/region_mean": 0.002091072739858646, "epoch": 1.083989501312336, "grad_norm": 0.16402515769004822, "learning_rate": 1e-06, "loss": -0.065, "step": 464 }, { "clip_ratio/high_max": 0.0024228825350292027, "clip_ratio/high_mean": 0.0010354480000387412, "clip_ratio/low_mean": 0.0005089695328024391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015444175223819911, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2188.0, "completions/mean_length": 959.919677734375, "completions/mean_terminated_length": 605.4061889648438, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.0863225430154564, "grad_norm": 0.28489306569099426, "learning_rate": 1e-06, "loss": -0.0359, "num_tokens": 70663920.0, "reward": 0.5948660969734192, "reward_std": 0.19050613045692444, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 465 }, { "clip_ratio/high_max": 0.002697384203202091, "clip_ratio/high_mean": 0.0011934083595406264, "clip_ratio/low_mean": 0.0007661204526812071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019595287885749713, "epoch": 1.0886555847185768, "grad_norm": 0.2708839774131775, "learning_rate": 1e-06, "loss": -0.0362, "step": 466 }, { "clip_ratio/high_max": 0.00271049264119938, "clip_ratio/high_mean": 0.00129466382713872, "clip_ratio/low_mean": 0.0009820834166021086, "clip_ratio/low_min": 2.2329402781906538e-05, "clip_ratio/region_mean": 0.002276747276482638, "epoch": 1.0909886264216972, "grad_norm": 0.20637908577919006, "learning_rate": 1e-06, "loss": -0.0364, "step": 467 }, { "clip_ratio/high_max": 0.002703095513425069, "clip_ratio/high_mean": 0.0012898474706162233, "clip_ratio/low_mean": 0.0012216093127790373, "clip_ratio/low_min": 2.2329402781906538e-05, "clip_ratio/region_mean": 0.002511456848878879, "epoch": 1.0933216681248177, "grad_norm": 0.2538661062717438, "learning_rate": 1e-06, "loss": -0.0365, "step": 468 }, { "clip_ratio/high_max": 0.002326559078937862, "clip_ratio/high_mean": 0.0008292814873129828, "clip_ratio/low_mean": 0.0004958757281201542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013251571945147589, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3780.0, "completions/mean_length": 1031.818115234375, "completions/mean_terminated_length": 638.1826171875, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 1.0956547098279381, "grad_norm": 0.23984180390834808, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 71244989.0, "reward": 0.640625, "reward_std": 0.15570516884326935, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 469 }, { "clip_ratio/high_max": 0.0030103095414233394, "clip_ratio/high_mean": 0.0009939269912138116, "clip_ratio/low_mean": 0.0006345219062495744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016284488810924813, "epoch": 1.0979877515310585, "grad_norm": 0.20683689415454865, "learning_rate": 1e-06, "loss": -0.0456, "step": 470 }, { "clip_ratio/high_max": 0.0025437069343752228, "clip_ratio/high_mean": 0.0008786136968410574, "clip_ratio/low_mean": 0.0008079185627138941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016865322904777713, "epoch": 1.100320793234179, "grad_norm": 0.18974076211452484, "learning_rate": 1e-06, "loss": -0.0458, "step": 471 }, { "clip_ratio/high_max": 0.0029568531972472556, "clip_ratio/high_mean": 0.0009815776320465375, "clip_ratio/low_mean": 0.000939180177738308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019207578152418137, "epoch": 1.1026538349372994, "grad_norm": 0.18512237071990967, "learning_rate": 1e-06, "loss": -0.0458, "step": 472 }, { "clip_ratio/high_max": 0.0018132032637367956, "clip_ratio/high_mean": 0.0007385272347164573, "clip_ratio/low_mean": 0.000592039981711423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013305672109709121, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3465.0, "completions/mean_length": 925.0803833007812, "completions/mean_terminated_length": 601.35791015625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 1.10498687664042, "grad_norm": 0.2671792209148407, "learning_rate": 1e-06, "loss": -0.047, "num_tokens": 71807517.0, "reward": 0.6261160969734192, "reward_std": 0.15503577888011932, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 473 }, { "clip_ratio/high_max": 0.0022711845631420147, "clip_ratio/high_mean": 0.000970492787018884, "clip_ratio/low_mean": 0.0007535526510764612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017240454253624193, "epoch": 1.1073199183435405, "grad_norm": 0.2036406397819519, "learning_rate": 1e-06, "loss": -0.0471, "step": 474 }, { "clip_ratio/high_max": 0.0024492949814884923, "clip_ratio/high_mean": 0.0009340437973150983, "clip_ratio/low_mean": 0.0009111156687140465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018451594587531872, "epoch": 1.109652960046661, "grad_norm": 0.1986713707447052, "learning_rate": 1e-06, "loss": -0.0473, "step": 475 }, { "clip_ratio/high_max": 0.0025339678759337403, "clip_ratio/high_mean": 0.000984592094027903, "clip_ratio/low_mean": 0.0010988305184582714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020834225942962803, "epoch": 1.1119860017497813, "grad_norm": 0.17704980075359344, "learning_rate": 1e-06, "loss": -0.0474, "step": 476 }, { "clip_ratio/high_max": 0.0026995309817721136, "clip_ratio/high_mean": 0.0010780071061162744, "clip_ratio/low_mean": 0.0005695656591342413, "clip_ratio/low_min": 9.328357919002883e-06, "clip_ratio/region_mean": 0.0016475728079967666, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 1060.641845703125, "completions/mean_terminated_length": 662.059326171875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.1143190434529018, "grad_norm": 0.26159003376960754, "learning_rate": 1e-06, "loss": -0.0551, "num_tokens": 72428508.0, "reward": 0.5703125, "reward_std": 0.2054987996816635, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 477 }, { "clip_ratio/high_max": 0.0029697629215661436, "clip_ratio/high_mean": 0.0012597079148690682, "clip_ratio/low_mean": 0.0008002120393939549, "clip_ratio/low_min": 2.153192508558277e-05, "clip_ratio/region_mean": 0.0020599199342541397, "epoch": 1.1166520851560222, "grad_norm": 0.21325941383838654, "learning_rate": 1e-06, "loss": -0.0554, "step": 478 }, { "clip_ratio/high_max": 0.002954839372250717, "clip_ratio/high_mean": 0.0012681778644036967, "clip_ratio/low_mean": 0.0009019180924951797, "clip_ratio/low_min": 1.0257672329316847e-05, "clip_ratio/region_mean": 0.0021700959332520142, "epoch": 1.1189851268591426, "grad_norm": 0.2004043459892273, "learning_rate": 1e-06, "loss": -0.0555, "step": 479 }, { "clip_ratio/high_max": 0.0030141370443743654, "clip_ratio/high_mean": 0.0012908005010103807, "clip_ratio/low_mean": 0.0012389729054120835, "clip_ratio/low_min": 3.0773018806939945e-05, "clip_ratio/region_mean": 0.002529773410060443, "epoch": 1.121318168562263, "grad_norm": 0.18501633405685425, "learning_rate": 1e-06, "loss": -0.0557, "step": 480 }, { "clip_ratio/high_max": 0.002389666642557131, "clip_ratio/high_mean": 0.0007511532421631273, "clip_ratio/low_mean": 0.0006754465671292564, "clip_ratio/low_min": 1.4688601368106902e-05, "clip_ratio/region_mean": 0.0014265998215705622, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3584.0, "completions/mean_length": 1110.560302734375, "completions/mean_terminated_length": 648.894287109375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 1.1236512102653835, "grad_norm": 0.27346184849739075, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 73029250.0, "reward": 0.5212053656578064, "reward_std": 0.15770326554775238, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982911348342896, "step": 481 }, { "clip_ratio/high_max": 0.0030613427225034684, "clip_ratio/high_mean": 0.0010165627209062222, "clip_ratio/low_mean": 0.0008935898658819497, "clip_ratio/low_min": 5.8754405472427607e-05, "clip_ratio/region_mean": 0.001910152604978066, "epoch": 1.125984251968504, "grad_norm": 0.2034171223640442, "learning_rate": 1e-06, "loss": -0.0271, "step": 482 }, { "clip_ratio/high_max": 0.0025401779857929796, "clip_ratio/high_mean": 0.0009724038245622069, "clip_ratio/low_mean": 0.0010596897463983623, "clip_ratio/low_min": 2.6282590624759905e-05, "clip_ratio/region_mean": 0.0020320935982454102, "epoch": 1.1283172936716244, "grad_norm": 0.1914975643157959, "learning_rate": 1e-06, "loss": -0.0272, "step": 483 }, { "clip_ratio/high_max": 0.003259218472521752, "clip_ratio/high_mean": 0.0010540255243540742, "clip_ratio/low_mean": 0.001212483823110233, "clip_ratio/low_min": 4.4065804104320705e-05, "clip_ratio/region_mean": 0.0022665093711111695, "epoch": 1.1306503353747448, "grad_norm": 0.22389191389083862, "learning_rate": 1e-06, "loss": -0.0273, "step": 484 }, { "clip_ratio/high_max": 0.0021428443360491656, "clip_ratio/high_mean": 0.0008868116710800678, "clip_ratio/low_mean": 0.0006069602713978384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014937718988221604, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2847.0, "completions/mean_length": 958.5491333007812, "completions/mean_terminated_length": 595.1830444335938, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 1.1329833770778652, "grad_norm": 0.2494707852602005, "learning_rate": 1e-06, "loss": -0.0271, "num_tokens": 73585750.0, "reward": 0.640625, "reward_std": 0.16871324181556702, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 485 }, { "clip_ratio/high_max": 0.002578438194177579, "clip_ratio/high_mean": 0.0010743611092038918, "clip_ratio/low_mean": 0.0008911755230656127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019655366268125363, "epoch": 1.1353164187809857, "grad_norm": 0.2744096517562866, "learning_rate": 1e-06, "loss": -0.0273, "step": 486 }, { "clip_ratio/high_max": 0.003209933507605456, "clip_ratio/high_mean": 0.001222103772306582, "clip_ratio/low_mean": 0.0009726033858896699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021947071436443366, "epoch": 1.137649460484106, "grad_norm": 0.29313361644744873, "learning_rate": 1e-06, "loss": -0.0275, "step": 487 }, { "clip_ratio/high_max": 0.0030149374433676712, "clip_ratio/high_mean": 0.001209986636240501, "clip_ratio/low_mean": 0.0013064717168163043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025164583494188264, "epoch": 1.1399825021872265, "grad_norm": 0.19453810155391693, "learning_rate": 1e-06, "loss": -0.0277, "step": 488 }, { "clip_ratio/high_max": 0.002624071013997309, "clip_ratio/high_mean": 0.0009458125041419407, "clip_ratio/low_mean": 0.0006403016905096592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015861142237554304, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 1115.6239013671875, "completions/mean_terminated_length": 698.5228881835938, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.142315543890347, "grad_norm": 0.24084880948066711, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 74224877.0, "reward": 0.5948660969734192, "reward_std": 0.1831425130367279, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 489 }, { "clip_ratio/high_max": 0.0029585360607597977, "clip_ratio/high_mean": 0.0010731726724770851, "clip_ratio/low_mean": 0.0008471020973956911, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019202747862436809, "epoch": 1.1446485855934676, "grad_norm": 0.21253973245620728, "learning_rate": 1e-06, "loss": -0.0433, "step": 490 }, { "clip_ratio/high_max": 0.003574863643734716, "clip_ratio/high_mean": 0.0012558270209410693, "clip_ratio/low_mean": 0.0009229206416421221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002178747636207845, "epoch": 1.1469816272965878, "grad_norm": 0.1913764625787735, "learning_rate": 1e-06, "loss": -0.0435, "step": 491 }, { "clip_ratio/high_max": 0.0031777483600308187, "clip_ratio/high_mean": 0.0012014315143460408, "clip_ratio/low_mean": 0.001132662691816222, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023340941916103475, "epoch": 1.1493146689997085, "grad_norm": 0.18061964213848114, "learning_rate": 1e-06, "loss": -0.0435, "step": 492 }, { "clip_ratio/high_max": 0.0020101828704355285, "clip_ratio/high_mean": 0.0007751140274194768, "clip_ratio/low_mean": 0.00044571058697329136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012208246043883264, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 1004.3839721679688, "completions/mean_terminated_length": 659.166259765625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 1.151647710702829, "grad_norm": 0.22036553919315338, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 74845677.0, "reward": 0.6350446939468384, "reward_std": 0.14948049187660217, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.4816865026950836, "step": 493 }, { "clip_ratio/high_max": 0.0022590553999179974, "clip_ratio/high_mean": 0.0009336157945654122, "clip_ratio/low_mean": 0.0006544762891280698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001588092025485821, "epoch": 1.1539807524059493, "grad_norm": 0.18672379851341248, "learning_rate": 1e-06, "loss": -0.0329, "step": 494 }, { "clip_ratio/high_max": 0.0024380449067393783, "clip_ratio/high_mean": 0.0010128747944690986, "clip_ratio/low_mean": 0.0007495823338103946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017624571337364614, "epoch": 1.1563137941090698, "grad_norm": 0.22866769134998322, "learning_rate": 1e-06, "loss": -0.033, "step": 495 }, { "clip_ratio/high_max": 0.0025246197037631646, "clip_ratio/high_mean": 0.0009747100557433441, "clip_ratio/low_mean": 0.000957716760240146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019324268505442888, "epoch": 1.1586468358121902, "grad_norm": 0.16688209772109985, "learning_rate": 1e-06, "loss": -0.0331, "step": 496 }, { "clip_ratio/high_max": 0.002160491898393957, "clip_ratio/high_mean": 0.0007186983257270185, "clip_ratio/low_mean": 0.0007738161439192481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014925144787412137, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 1202.20654296875, "completions/mean_terminated_length": 661.7761840820312, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 1.1609798775153106, "grad_norm": 0.24846456944942474, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 75435662.0, "reward": 0.5234375, "reward_std": 0.13575772941112518, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 497 }, { "clip_ratio/high_max": 0.0026769454598252196, "clip_ratio/high_mean": 0.0008719501320229028, "clip_ratio/low_mean": 0.00085258467879612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001724534831737401, "epoch": 1.163312919218431, "grad_norm": 0.22556813061237335, "learning_rate": 1e-06, "loss": -0.0507, "step": 498 }, { "clip_ratio/high_max": 0.003002273697347846, "clip_ratio/high_mean": 0.0009335558734164806, "clip_ratio/low_mean": 0.0011308597659080988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020644156757043675, "epoch": 1.1656459609215515, "grad_norm": 0.18102315068244934, "learning_rate": 1e-06, "loss": -0.0509, "step": 499 }, { "clip_ratio/high_max": 0.0029013332896283828, "clip_ratio/high_mean": 0.0009207827542923042, "clip_ratio/low_mean": 0.0012869851234427188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022077678513596766, "epoch": 1.167979002624672, "grad_norm": 0.16825012862682343, "learning_rate": 1e-06, "loss": -0.051, "step": 500 }, { "clip_ratio/high_max": 0.0017726223231875338, "clip_ratio/high_mean": 0.0006842501479695784, "clip_ratio/low_mean": 0.00048078535655804444, "clip_ratio/low_min": 8.508031896781176e-06, "clip_ratio/region_mean": 0.0011650355045276228, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3630.0, "completions/mean_length": 984.5201416015625, "completions/mean_terminated_length": 632.7875366210938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.1703120443277923, "grad_norm": 0.24229595065116882, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 76033536.0, "reward": 0.6104910969734192, "reward_std": 0.15480685234069824, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 501 }, { "clip_ratio/high_max": 0.0024169529206119478, "clip_ratio/high_mean": 0.0010063247500511352, "clip_ratio/low_mean": 0.0006739567579643335, "clip_ratio/low_min": 2.0223264073138125e-05, "clip_ratio/region_mean": 0.0016802815152914263, "epoch": 1.1726450860309128, "grad_norm": 0.2216462790966034, "learning_rate": 1e-06, "loss": -0.0406, "step": 502 }, { "clip_ratio/high_max": 0.0023699274461250752, "clip_ratio/high_mean": 0.0009497327700955793, "clip_ratio/low_mean": 0.0008968771453510271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018466099500074051, "epoch": 1.1749781277340332, "grad_norm": 0.1699136197566986, "learning_rate": 1e-06, "loss": -0.0408, "step": 503 }, { "clip_ratio/high_max": 0.002410533001238946, "clip_ratio/high_mean": 0.0009472103756706929, "clip_ratio/low_mean": 0.0010392735275672749, "clip_ratio/low_min": 2.691065674298443e-05, "clip_ratio/region_mean": 0.001986483890505042, "epoch": 1.1773111694371536, "grad_norm": 0.18011756241321564, "learning_rate": 1e-06, "loss": -0.0409, "step": 504 }, { "clip_ratio/high_max": 0.0017886991263367236, "clip_ratio/high_mean": 0.0007002090806054184, "clip_ratio/low_mean": 0.0007003072787483688, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001400516335706925, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 1028.407470703125, "completions/mean_terminated_length": 585.702392578125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.179644211140274, "grad_norm": 0.27300500869750977, "learning_rate": 1e-06, "loss": -0.0383, "num_tokens": 76574741.0, "reward": 0.6350446939468384, "reward_std": 0.16183637082576752, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 505 }, { "clip_ratio/high_max": 0.0026057456525450107, "clip_ratio/high_mean": 0.0010075661084556486, "clip_ratio/low_mean": 0.001023779108436429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020313452623668127, "epoch": 1.1819772528433945, "grad_norm": 0.2584632933139801, "learning_rate": 1e-06, "loss": -0.0386, "step": 506 }, { "clip_ratio/high_max": 0.0027077028789790347, "clip_ratio/high_mean": 0.0010219967316515977, "clip_ratio/low_mean": 0.0011785405349655775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022005372447893023, "epoch": 1.184310294546515, "grad_norm": 0.21871411800384521, "learning_rate": 1e-06, "loss": -0.0388, "step": 507 }, { "clip_ratio/high_max": 0.002569734613643959, "clip_ratio/high_mean": 0.0009420320166100282, "clip_ratio/low_mean": 0.0014475440202659229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023895760532468557, "epoch": 1.1866433362496354, "grad_norm": 0.21576610207557678, "learning_rate": 1e-06, "loss": -0.0389, "step": 508 }, { "clip_ratio/high_max": 0.0026465314876986668, "clip_ratio/high_mean": 0.0011771513309213333, "clip_ratio/low_mean": 0.0005883481826458592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017654995463090017, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 1036.7489013671875, "completions/mean_terminated_length": 621.8694458007812, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.188976377952756, "grad_norm": 0.2937774658203125, "learning_rate": 1e-06, "loss": -0.0722, "num_tokens": 77157316.0, "reward": 0.5792410969734192, "reward_std": 0.19799211621284485, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 509 }, { "clip_ratio/high_max": 0.0030839903265587054, "clip_ratio/high_mean": 0.0013798255131405313, "clip_ratio/low_mean": 0.0008560552560084034, "clip_ratio/low_min": 3.3068783523049206e-05, "clip_ratio/region_mean": 0.0022358807545970194, "epoch": 1.1913094196558764, "grad_norm": 0.25443971157073975, "learning_rate": 1e-06, "loss": -0.0725, "step": 510 }, { "clip_ratio/high_max": 0.0034403286554152146, "clip_ratio/high_mean": 0.0014830293293925934, "clip_ratio/low_mean": 0.001141271259257337, "clip_ratio/low_min": 4.0102662751451135e-05, "clip_ratio/region_mean": 0.0026243005704600364, "epoch": 1.1936424613589969, "grad_norm": 0.19449834525585175, "learning_rate": 1e-06, "loss": -0.0727, "step": 511 }, { "clip_ratio/high_max": 0.003032838176295627, "clip_ratio/high_mean": 0.001370453795971116, "clip_ratio/low_mean": 0.0011973431919614086, "clip_ratio/low_min": 7.449344411725178e-05, "clip_ratio/region_mean": 0.0025677970043034293, "epoch": 1.1959755030621173, "grad_norm": 0.20635251700878143, "learning_rate": 1e-06, "loss": -0.0728, "step": 512 }, { "clip_ratio/high_max": 0.0024143223017745186, "clip_ratio/high_mean": 0.0009924522710207384, "clip_ratio/low_mean": 0.0007360769614024321, "clip_ratio/low_min": 1.7293858036282472e-05, "clip_ratio/region_mean": 0.001728529248794075, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 1060.2254638671875, "completions/mean_terminated_length": 657.2465209960938, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.1983085447652377, "grad_norm": 0.2654973864555359, "learning_rate": 1e-06, "loss": -0.0526, "num_tokens": 77770286.0, "reward": 0.5680803656578064, "reward_std": 0.1995958387851715, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 513 }, { "clip_ratio/high_max": 0.0024228927577496506, "clip_ratio/high_mean": 0.0010833551295945654, "clip_ratio/low_mean": 0.0010713286137615796, "clip_ratio/low_min": 9.454540486331098e-05, "clip_ratio/region_mean": 0.0021546837378991768, "epoch": 1.2006415864683582, "grad_norm": 0.22051560878753662, "learning_rate": 1e-06, "loss": -0.053, "step": 514 }, { "clip_ratio/high_max": 0.0030360161181306466, "clip_ratio/high_mean": 0.0011675423866108758, "clip_ratio/low_mean": 0.0012180573357909452, "clip_ratio/low_min": 0.00010423983803775627, "clip_ratio/region_mean": 0.002385599735134747, "epoch": 1.2029746281714786, "grad_norm": 0.20579476654529572, "learning_rate": 1e-06, "loss": -0.0531, "step": 515 }, { "clip_ratio/high_max": 0.002736599933996331, "clip_ratio/high_mean": 0.0011807907576439902, "clip_ratio/low_mean": 0.0014078295971557964, "clip_ratio/low_min": 9.662347838457208e-05, "clip_ratio/region_mean": 0.0025886203802656382, "epoch": 1.205307669874599, "grad_norm": 0.2896462082862854, "learning_rate": 1e-06, "loss": -0.0533, "step": 516 }, { "clip_ratio/high_max": 0.002434489084407687, "clip_ratio/high_mean": 0.001015359470329713, "clip_ratio/low_mean": 0.0005827424547533155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001598101960553322, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 1130.09375, "completions/mean_terminated_length": 689.01025390625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.2076407115777195, "grad_norm": 0.46300458908081055, "learning_rate": 1e-06, "loss": -0.0369, "num_tokens": 78400858.0, "reward": 0.5658482313156128, "reward_std": 0.17829221487045288, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 517 }, { "clip_ratio/high_max": 0.0025428009321331047, "clip_ratio/high_mean": 0.0011331242694723187, "clip_ratio/low_mean": 0.0012312159096836695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002364340180065483, "epoch": 1.20997375328084, "grad_norm": 0.6013938784599304, "learning_rate": 1e-06, "loss": -0.0371, "step": 518 }, { "clip_ratio/high_max": 0.0031117841717787087, "clip_ratio/high_mean": 0.001198860463773599, "clip_ratio/low_mean": 0.002348972273466643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0035478327226883266, "epoch": 1.2123067949839603, "grad_norm": 0.3071836829185486, "learning_rate": 1e-06, "loss": -0.0375, "step": 519 }, { "clip_ratio/high_max": 0.002819619156070985, "clip_ratio/high_mean": 0.0012652650621021166, "clip_ratio/low_mean": 0.002037731521340902, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.003302996585262008, "epoch": 1.2146398366870808, "grad_norm": 2.0215115547180176, "learning_rate": 1e-06, "loss": -0.0354, "step": 520 }, { "clip_ratio/high_max": 0.002378325239988044, "clip_ratio/high_mean": 0.0008747008068894502, "clip_ratio/low_mean": 0.0004809233500964183, "clip_ratio/low_min": 1.7477628716733307e-05, "clip_ratio/region_mean": 0.001355624190182425, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3359.0, "completions/mean_length": 879.5569458007812, "completions/mean_terminated_length": 594.2588500976562, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.2169728783902012, "grad_norm": 0.29543742537498474, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 78975085.0, "reward": 0.5892857313156128, "reward_std": 0.15221282839775085, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 521 }, { "clip_ratio/high_max": 0.002576551472884603, "clip_ratio/high_mean": 0.0009847943820204819, "clip_ratio/low_mean": 0.0007131154156923003, "clip_ratio/low_min": 2.307124486833345e-05, "clip_ratio/region_mean": 0.0016979097708826885, "epoch": 1.2193059200933216, "grad_norm": 0.22369630634784698, "learning_rate": 1e-06, "loss": -0.0416, "step": 522 }, { "clip_ratio/high_max": 0.0028981081777601503, "clip_ratio/high_mean": 0.0010226434642390814, "clip_ratio/low_mean": 0.0008572595120313053, "clip_ratio/low_min": 3.495525743346661e-05, "clip_ratio/region_mean": 0.0018799030040099751, "epoch": 1.221638961796442, "grad_norm": 0.19105824828147888, "learning_rate": 1e-06, "loss": -0.0418, "step": 523 }, { "clip_ratio/high_max": 0.0028165603798697703, "clip_ratio/high_mean": 0.0010105444034707034, "clip_ratio/low_mean": 0.0009608435766494949, "clip_ratio/low_min": 2.4029219275689684e-05, "clip_ratio/region_mean": 0.0019713879737537354, "epoch": 1.2239720034995625, "grad_norm": 0.2389257699251175, "learning_rate": 1e-06, "loss": -0.0418, "step": 524 }, { "clip_ratio/high_max": 0.002592427917988971, "clip_ratio/high_mean": 0.0009008916185848648, "clip_ratio/low_mean": 0.00042396093704155646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013248525792732835, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 969.5078735351562, "completions/mean_terminated_length": 589.9461669921875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.226305045202683, "grad_norm": 0.22978700697422028, "learning_rate": 1e-06, "loss": -0.0676, "num_tokens": 79531948.0, "reward": 0.6071428656578064, "reward_std": 0.151575967669487, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 525 }, { "clip_ratio/high_max": 0.003308030150947161, "clip_ratio/high_mean": 0.0011520420393935638, "clip_ratio/low_mean": 0.0006933599233889254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018454019736964256, "epoch": 1.2286380869058036, "grad_norm": 0.1991288810968399, "learning_rate": 1e-06, "loss": -0.0679, "step": 526 }, { "clip_ratio/high_max": 0.0037966473028063774, "clip_ratio/high_mean": 0.0012030818907078356, "clip_ratio/low_mean": 0.0008858901655912632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002088972025376279, "epoch": 1.2309711286089238, "grad_norm": 0.1985888034105301, "learning_rate": 1e-06, "loss": -0.068, "step": 527 }, { "clip_ratio/high_max": 0.0035153744684066623, "clip_ratio/high_mean": 0.0011972285283263773, "clip_ratio/low_mean": 0.0010277893488819245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002225017909950111, "epoch": 1.2333041703120444, "grad_norm": 0.18501132726669312, "learning_rate": 1e-06, "loss": -0.0681, "step": 528 }, { "clip_ratio/high_max": 0.002251866906590294, "clip_ratio/high_mean": 0.0007997916636668378, "clip_ratio/low_mean": 0.0006696858235955006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014694774727104232, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2754.0, "completions/mean_length": 1112.25341796875, "completions/mean_terminated_length": 605.8733520507812, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 1.2356372120151649, "grad_norm": 0.29438626766204834, "learning_rate": 1e-06, "loss": -0.0569, "num_tokens": 80086919.0, "reward": 0.5390625, "reward_std": 0.16435301303863525, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 529 }, { "clip_ratio/high_max": 0.0027898812768398784, "clip_ratio/high_mean": 0.0010612102978484472, "clip_ratio/low_mean": 0.0008457083604298532, "clip_ratio/low_min": 1.3377568393480033e-05, "clip_ratio/region_mean": 0.0019069186528213322, "epoch": 1.2379702537182853, "grad_norm": 0.22571726143360138, "learning_rate": 1e-06, "loss": -0.0572, "step": 530 }, { "clip_ratio/high_max": 0.0031235070564434864, "clip_ratio/high_mean": 0.0010874585277633741, "clip_ratio/low_mean": 0.0010993516680173343, "clip_ratio/low_min": 1.3377568393480033e-05, "clip_ratio/region_mean": 0.002186810153943952, "epoch": 1.2403032954214057, "grad_norm": 0.18684718012809753, "learning_rate": 1e-06, "loss": -0.0574, "step": 531 }, { "clip_ratio/high_max": 0.002868225252314005, "clip_ratio/high_mean": 0.0010452830683789216, "clip_ratio/low_mean": 0.0012705202316283248, "clip_ratio/low_min": 3.6909448681399226e-05, "clip_ratio/region_mean": 0.0023158032781793736, "epoch": 1.2426363371245261, "grad_norm": 0.20576512813568115, "learning_rate": 1e-06, "loss": -0.0574, "step": 532 }, { "clip_ratio/high_max": 0.0024350592066184618, "clip_ratio/high_mean": 0.0010799655137816444, "clip_ratio/low_mean": 0.0005255854721326614, "clip_ratio/low_min": 1.6657782907714136e-05, "clip_ratio/region_mean": 0.001605550991371274, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3299.0, "completions/mean_length": 976.3772583007812, "completions/mean_terminated_length": 593.2656860351562, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 1.2449693788276466, "grad_norm": 0.3275110423564911, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 80641609.0, "reward": 0.6428571939468384, "reward_std": 0.18265356123447418, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 533 }, { "clip_ratio/high_max": 0.0031015579297672957, "clip_ratio/high_mean": 0.0013122145355737302, "clip_ratio/low_mean": 0.000775938459810277, "clip_ratio/low_min": 2.710908665903844e-05, "clip_ratio/region_mean": 0.002088153036311269, "epoch": 1.247302420530767, "grad_norm": 0.281019926071167, "learning_rate": 1e-06, "loss": -0.0673, "step": 534 }, { "clip_ratio/high_max": 0.002716930066526402, "clip_ratio/high_mean": 0.0012175443043815903, "clip_ratio/low_mean": 0.0009726612188387662, "clip_ratio/low_min": 1.6657782907714136e-05, "clip_ratio/region_mean": 0.0021902055596001446, "epoch": 1.2496354622338874, "grad_norm": 0.20063097774982452, "learning_rate": 1e-06, "loss": -0.0675, "step": 535 }, { "clip_ratio/high_max": 0.0030896394382580183, "clip_ratio/high_mean": 0.0012307345277804416, "clip_ratio/low_mean": 0.0011316145337332273, "clip_ratio/low_min": 1.355454332951922e-05, "clip_ratio/region_mean": 0.0023623489978490397, "epoch": 1.2519685039370079, "grad_norm": 0.2279004156589508, "learning_rate": 1e-06, "loss": -0.0675, "step": 536 }, { "clip_ratio/high_max": 0.002269060983962845, "clip_ratio/high_mean": 0.0009541396066197194, "clip_ratio/low_mean": 0.0005999795075695147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015541191351076122, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 934.7801513671875, "completions/mean_terminated_length": 594.822021484375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.2543015456401283, "grad_norm": 0.2848651111125946, "learning_rate": 1e-06, "loss": -0.0739, "num_tokens": 81206836.0, "reward": 0.6696428656578064, "reward_std": 0.17649206519126892, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 537 }, { "clip_ratio/high_max": 0.00296455933857942, "clip_ratio/high_mean": 0.0012060076842317358, "clip_ratio/low_mean": 0.0008301700763695408, "clip_ratio/low_min": 1.385194991598837e-05, "clip_ratio/region_mean": 0.0020361777787911706, "epoch": 1.2566345873432487, "grad_norm": 0.3101758360862732, "learning_rate": 1e-06, "loss": -0.0741, "step": 538 }, { "clip_ratio/high_max": 0.0026383453950984403, "clip_ratio/high_mean": 0.0011584382482396904, "clip_ratio/low_mean": 0.0010574414545772015, "clip_ratio/low_min": 3.907471182174049e-05, "clip_ratio/region_mean": 0.0022158796709845774, "epoch": 1.2589676290463692, "grad_norm": 0.21931804716587067, "learning_rate": 1e-06, "loss": -0.0743, "step": 539 }, { "clip_ratio/high_max": 0.002541691101214383, "clip_ratio/high_mean": 0.001116355419071624, "clip_ratio/low_mean": 0.0012633948963411967, "clip_ratio/low_min": 2.770389983197674e-05, "clip_ratio/region_mean": 0.0023797503308742307, "epoch": 1.2613006707494896, "grad_norm": 0.3209487199783325, "learning_rate": 1e-06, "loss": -0.0744, "step": 540 }, { "clip_ratio/high_max": 0.0023388528879877413, "clip_ratio/high_mean": 0.0008931090860642144, "clip_ratio/low_mean": 0.0006677318378933705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001560840908496175, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3303.0, "completions/mean_length": 992.5625610351562, "completions/mean_terminated_length": 598.289306640625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.26363371245261, "grad_norm": 0.24479156732559204, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 81774076.0, "reward": 0.5770089626312256, "reward_std": 0.1669466197490692, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099617958069, "step": 541 }, { "clip_ratio/high_max": 0.002695808510907227, "clip_ratio/high_mean": 0.0011374612786312355, "clip_ratio/low_mean": 0.0007252048162627034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018626660930749495, "epoch": 1.2659667541557305, "grad_norm": 0.22449366748332977, "learning_rate": 1e-06, "loss": -0.0439, "step": 542 }, { "clip_ratio/high_max": 0.0026009647990576923, "clip_ratio/high_mean": 0.0010876012001972413, "clip_ratio/low_mean": 0.0010571295370027656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021447307444759645, "epoch": 1.268299795858851, "grad_norm": 0.1874256134033203, "learning_rate": 1e-06, "loss": -0.0441, "step": 543 }, { "clip_ratio/high_max": 0.002826059189828811, "clip_ratio/high_mean": 0.0011576665710890666, "clip_ratio/low_mean": 0.0011849075144709786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002342574080103077, "epoch": 1.2706328375619713, "grad_norm": 0.17332985997200012, "learning_rate": 1e-06, "loss": -0.0442, "step": 544 }, { "clip_ratio/high_max": 0.0022264186009124387, "clip_ratio/high_mean": 0.001017978900563321, "clip_ratio/low_mean": 0.000610701295954641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016286801983369514, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 1057.01904296875, "completions/mean_terminated_length": 622.8787841796875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.272965879265092, "grad_norm": 0.3083045184612274, "learning_rate": 1e-06, "loss": -0.05, "num_tokens": 82348637.0, "reward": 0.5770089626312256, "reward_std": 0.18761083483695984, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 545 }, { "clip_ratio/high_max": 0.0033035823944373988, "clip_ratio/high_mean": 0.0012792956767953, "clip_ratio/low_mean": 0.0010433476818434428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023226433695526794, "epoch": 1.2752989209682122, "grad_norm": 0.2574704885482788, "learning_rate": 1e-06, "loss": -0.0503, "step": 546 }, { "clip_ratio/high_max": 0.0032511619647266343, "clip_ratio/high_mean": 0.0012776143485098146, "clip_ratio/low_mean": 0.0011831686570076272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002460783020069357, "epoch": 1.2776319626713328, "grad_norm": 0.23422737419605255, "learning_rate": 1e-06, "loss": -0.0504, "step": 547 }, { "clip_ratio/high_max": 0.002982414713187609, "clip_ratio/high_mean": 0.0011638829091680236, "clip_ratio/low_mean": 0.001405524933943525, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002569407799455803, "epoch": 1.2799650043744533, "grad_norm": 0.2075558751821518, "learning_rate": 1e-06, "loss": -0.0506, "step": 548 }, { "clip_ratio/high_max": 0.0018218839504697826, "clip_ratio/high_mean": 0.0006480422534878016, "clip_ratio/low_mean": 0.0006002505042488337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012482927522796672, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3967.0, "completions/mean_length": 1145.2366943359375, "completions/mean_terminated_length": 644.454345703125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 1.2822980460775737, "grad_norm": 0.23727191984653473, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 82934601.0, "reward": 0.5502232313156128, "reward_std": 0.13722196221351624, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 549 }, { "clip_ratio/high_max": 0.0020135488175583305, "clip_ratio/high_mean": 0.000720536571861885, "clip_ratio/low_mean": 0.000795816269601346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015163528587436303, "epoch": 1.2846310877806941, "grad_norm": 0.20960785448551178, "learning_rate": 1e-06, "loss": -0.0328, "step": 550 }, { "clip_ratio/high_max": 0.0021363516370911384, "clip_ratio/high_mean": 0.0007712140754847496, "clip_ratio/low_mean": 0.001017558195599122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017887722860905342, "epoch": 1.2869641294838146, "grad_norm": 0.1890256106853485, "learning_rate": 1e-06, "loss": -0.0329, "step": 551 }, { "clip_ratio/high_max": 0.002385011419391958, "clip_ratio/high_mean": 0.0007658743115825928, "clip_ratio/low_mean": 0.0012119299499318004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019778042478719726, "epoch": 1.289297171186935, "grad_norm": 0.16167958080768585, "learning_rate": 1e-06, "loss": -0.033, "step": 552 }, { "clip_ratio/high_max": 0.0025781944350455888, "clip_ratio/high_mean": 0.001074661959137302, "clip_ratio/low_mean": 0.0006483473480329849, "clip_ratio/low_min": 1.0751763511507306e-05, "clip_ratio/region_mean": 0.0017230093289981596, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 1220.677490234375, "completions/mean_terminated_length": 665.52197265625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.2916302128900554, "grad_norm": 0.2890176475048065, "learning_rate": 1e-06, "loss": -0.0684, "num_tokens": 83530192.0, "reward": 0.515625, "reward_std": 0.19622044265270233, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 553 }, { "clip_ratio/high_max": 0.0029420962018775754, "clip_ratio/high_mean": 0.0012198284202895593, "clip_ratio/low_mean": 0.000763745676522376, "clip_ratio/low_min": 2.1606361769954674e-05, "clip_ratio/region_mean": 0.0019835740604321472, "epoch": 1.2939632545931758, "grad_norm": 0.24971313774585724, "learning_rate": 1e-06, "loss": -0.0685, "step": 554 }, { "clip_ratio/high_max": 0.0034426571655785665, "clip_ratio/high_mean": 0.001382934980938444, "clip_ratio/low_mean": 0.001083816277969163, "clip_ratio/low_min": 1.0751763511507306e-05, "clip_ratio/region_mean": 0.0024667512843734585, "epoch": 1.2962962962962963, "grad_norm": 0.21363192796707153, "learning_rate": 1e-06, "loss": -0.0688, "step": 555 }, { "clip_ratio/high_max": 0.003283892227045726, "clip_ratio/high_mean": 0.0013199301065469626, "clip_ratio/low_mean": 0.0012499797667260282, "clip_ratio/low_min": 2.1244051822577603e-05, "clip_ratio/region_mean": 0.0025699098987388425, "epoch": 1.2986293379994167, "grad_norm": 0.22296246886253357, "learning_rate": 1e-06, "loss": -0.0689, "step": 556 }, { "clip_ratio/high_max": 0.001826815896492917, "clip_ratio/high_mean": 0.0006240852071641712, "clip_ratio/low_mean": 0.0005783656320090813, "clip_ratio/low_min": 9.656983820605092e-06, "clip_ratio/region_mean": 0.0012024508323520422, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 1182.9554443359375, "completions/mean_terminated_length": 661.6737060546875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.3009623797025371, "grad_norm": 0.2738368511199951, "learning_rate": 1e-06, "loss": -0.054, "num_tokens": 84126120.0, "reward": 0.5089285969734192, "reward_std": 0.1451537311077118, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 557 }, { "clip_ratio/high_max": 0.0022162531822687015, "clip_ratio/high_mean": 0.0007980015361681581, "clip_ratio/low_mean": 0.0007888012933108257, "clip_ratio/low_min": 1.0867674973269459e-05, "clip_ratio/region_mean": 0.0015868028131080791, "epoch": 1.3032954214056576, "grad_norm": 0.2366650104522705, "learning_rate": 1e-06, "loss": -0.0543, "step": 558 }, { "clip_ratio/high_max": 0.0025047057497431524, "clip_ratio/high_mean": 0.0008618533920525806, "clip_ratio/low_mean": 0.001016271615299047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001878125018265564, "epoch": 1.305628463108778, "grad_norm": 0.21774974465370178, "learning_rate": 1e-06, "loss": -0.0544, "step": 559 }, { "clip_ratio/high_max": 0.0023650196781090926, "clip_ratio/high_mean": 0.0008386210593016585, "clip_ratio/low_mean": 0.0012069545991835184, "clip_ratio/low_min": 6.15359385847114e-05, "clip_ratio/region_mean": 0.0020455756784940604, "epoch": 1.3079615048118984, "grad_norm": 0.18776483833789825, "learning_rate": 1e-06, "loss": -0.0545, "step": 560 }, { "clip_ratio/high_max": 0.002210908722190652, "clip_ratio/high_mean": 0.000765536697144853, "clip_ratio/low_mean": 0.000620098414401582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013856350979040144, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 1280.943115234375, "completions/mean_terminated_length": 705.8239135742188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 1.3102945465150189, "grad_norm": 0.24883843958377838, "learning_rate": 1e-06, "loss": -0.0668, "num_tokens": 84745181.0, "reward": 0.5167410969734192, "reward_std": 0.15518662333488464, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 561 }, { "clip_ratio/high_max": 0.0027370128518668935, "clip_ratio/high_mean": 0.0010297183680449962, "clip_ratio/low_mean": 0.0008404065083595924, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018701249246078078, "epoch": 1.3126275882181395, "grad_norm": 0.19404339790344238, "learning_rate": 1e-06, "loss": -0.067, "step": 562 }, { "clip_ratio/high_max": 0.002672463200724451, "clip_ratio/high_mean": 0.0009291456617575022, "clip_ratio/low_mean": 0.0010373450859333389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001966490781342145, "epoch": 1.3149606299212597, "grad_norm": 0.2075706124305725, "learning_rate": 1e-06, "loss": -0.0672, "step": 563 }, { "clip_ratio/high_max": 0.0024174200952984393, "clip_ratio/high_mean": 0.0009365485079797509, "clip_ratio/low_mean": 0.0011528040340635926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002089352550683543, "epoch": 1.3172936716243804, "grad_norm": 0.173910990357399, "learning_rate": 1e-06, "loss": -0.0672, "step": 564 }, { "clip_ratio/high_max": 0.002502191076928284, "clip_ratio/high_mean": 0.0008761862118262798, "clip_ratio/low_mean": 0.0008554907271900447, "clip_ratio/low_min": 1.070755570253823e-05, "clip_ratio/region_mean": 0.0017316769881290384, "completions/clipped_ratio": 0.1774553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 1279.318115234375, "completions/mean_terminated_length": 671.6485595703125, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 1.3196267133275008, "grad_norm": 0.2694673240184784, "learning_rate": 1e-06, "loss": -0.0587, "num_tokens": 85333594.0, "reward": 0.4955357313156128, "reward_std": 0.16450457274913788, "rewards/verify_math_reward/mean": 0.4955357015132904, "rewards/verify_math_reward/std": 0.500259280204773, "step": 565 }, { "clip_ratio/high_max": 0.0036288581541157328, "clip_ratio/high_mean": 0.0013075614733679686, "clip_ratio/low_mean": 0.001007825490887626, "clip_ratio/low_min": 2.141511140507646e-05, "clip_ratio/region_mean": 0.0023153869624366052, "epoch": 1.3219597550306212, "grad_norm": 0.27903032302856445, "learning_rate": 1e-06, "loss": -0.0589, "step": 566 }, { "clip_ratio/high_max": 0.0031396789054269902, "clip_ratio/high_mean": 0.0010936912440229207, "clip_ratio/low_mean": 0.0011650597607513191, "clip_ratio/low_min": 1.2437811165000312e-05, "clip_ratio/region_mean": 0.002258751028421102, "epoch": 1.3242927967337417, "grad_norm": 0.22430910170078278, "learning_rate": 1e-06, "loss": -0.0592, "step": 567 }, { "clip_ratio/high_max": 0.0029859956121072173, "clip_ratio/high_mean": 0.0010755592793429969, "clip_ratio/low_mean": 0.0014766222884645686, "clip_ratio/low_min": 3.731343167601153e-05, "clip_ratio/region_mean": 0.0025521815186948515, "epoch": 1.326625838436862, "grad_norm": 0.20644904673099518, "learning_rate": 1e-06, "loss": -0.0593, "step": 568 }, { "clip_ratio/high_max": 0.0024077963025774807, "clip_ratio/high_mean": 0.0009524892739136703, "clip_ratio/low_mean": 0.0007456240709871054, "clip_ratio/low_min": 2.887827122322051e-05, "clip_ratio/region_mean": 0.0016981133230729029, "completions/clipped_ratio": 0.1741071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 1243.03466796875, "completions/mean_terminated_length": 641.5986938476562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.3289588801399825, "grad_norm": 0.3015615940093994, "learning_rate": 1e-06, "loss": -0.0808, "num_tokens": 85899945.0, "reward": 0.5011160969734192, "reward_std": 0.19895894825458527, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 569 }, { "clip_ratio/high_max": 0.003500665996398311, "clip_ratio/high_mean": 0.0012464672872738447, "clip_ratio/low_mean": 0.0010257575977448141, "clip_ratio/low_min": 0.00012356816660030745, "clip_ratio/region_mean": 0.002272224875923712, "epoch": 1.331291921843103, "grad_norm": 0.24528682231903076, "learning_rate": 1e-06, "loss": -0.0811, "step": 570 }, { "clip_ratio/high_max": 0.0038307572322082706, "clip_ratio/high_mean": 0.0013789985459879972, "clip_ratio/low_mean": 0.0012182924365333747, "clip_ratio/low_min": 0.00011780763452406973, "clip_ratio/region_mean": 0.002597291022539139, "epoch": 1.3336249635462234, "grad_norm": 0.208794504404068, "learning_rate": 1e-06, "loss": -0.0814, "step": 571 }, { "clip_ratio/high_max": 0.0031483758939430118, "clip_ratio/high_mean": 0.0011578826415643562, "clip_ratio/low_mean": 0.0014242229008232243, "clip_ratio/low_min": 0.00011548066322575323, "clip_ratio/region_mean": 0.002582105516921729, "epoch": 1.3359580052493438, "grad_norm": 0.2046995311975479, "learning_rate": 1e-06, "loss": -0.0814, "step": 572 }, { "clip_ratio/high_max": 0.0023555047664558515, "clip_ratio/high_mean": 0.0008531126677553402, "clip_ratio/low_mean": 0.0005902969119233603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014434096083277836, "completions/clipped_ratio": 0.1752232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 1285.040283203125, "completions/mean_terminated_length": 687.8538208007812, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 1.3382910469524643, "grad_norm": 0.25017184019088745, "learning_rate": 1e-06, "loss": -0.0753, "num_tokens": 86504485.0, "reward": 0.5256696939468384, "reward_std": 0.17757472395896912, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195137500763, "step": 573 }, { "clip_ratio/high_max": 0.0028082246353733353, "clip_ratio/high_mean": 0.0010514356654311996, "clip_ratio/low_mean": 0.0007836565755496849, "clip_ratio/low_min": 4.149826600041706e-05, "clip_ratio/region_mean": 0.0018350922473473474, "epoch": 1.3406240886555847, "grad_norm": 0.22260355949401855, "learning_rate": 1e-06, "loss": -0.0755, "step": 574 }, { "clip_ratio/high_max": 0.0034009277733275667, "clip_ratio/high_mean": 0.0011136180037283339, "clip_ratio/low_mean": 0.001009279998470447, "clip_ratio/low_min": 5.6302465964108706e-05, "clip_ratio/region_mean": 0.0021228979603620246, "epoch": 1.3429571303587051, "grad_norm": 0.2014068365097046, "learning_rate": 1e-06, "loss": -0.0758, "step": 575 }, { "clip_ratio/high_max": 0.0031681032451160718, "clip_ratio/high_mean": 0.0011202151526958914, "clip_ratio/low_mean": 0.0011742266851797467, "clip_ratio/low_min": 6.622609544137958e-05, "clip_ratio/region_mean": 0.0022944418524275534, "epoch": 1.3452901720618256, "grad_norm": 0.2062731385231018, "learning_rate": 1e-06, "loss": -0.0758, "step": 576 }, { "clip_ratio/high_max": 0.00190445312182419, "clip_ratio/high_mean": 0.0007755427886877442, "clip_ratio/low_mean": 0.0005309752777975518, "clip_ratio/low_min": 1.972231075342279e-05, "clip_ratio/region_mean": 0.0013065180792182218, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3200.0, "completions/mean_length": 1232.1640625, "completions/mean_terminated_length": 580.9356079101562, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.347623213764946, "grad_norm": 0.30050161480903625, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 87015224.0, "reward": 0.559151828289032, "reward_std": 0.15045617520809174, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 577 }, { "clip_ratio/high_max": 0.002414862574369181, "clip_ratio/high_mean": 0.0009709475780255161, "clip_ratio/low_mean": 0.0007054359193716664, "clip_ratio/low_min": 1.972231075342279e-05, "clip_ratio/region_mean": 0.001676383486483246, "epoch": 1.3499562554680664, "grad_norm": 0.23338362574577332, "learning_rate": 1e-06, "loss": -0.0674, "step": 578 }, { "clip_ratio/high_max": 0.0023903432229417376, "clip_ratio/high_mean": 0.0009181600580632221, "clip_ratio/low_mean": 0.0009727856850076932, "clip_ratio/low_min": 3.944462150684558e-05, "clip_ratio/region_mean": 0.0018909457576228306, "epoch": 1.352289297171187, "grad_norm": 0.21429312229156494, "learning_rate": 1e-06, "loss": -0.0675, "step": 579 }, { "clip_ratio/high_max": 0.0023471144886570983, "clip_ratio/high_mean": 0.0009947415128408466, "clip_ratio/low_mean": 0.0010963070253637852, "clip_ratio/low_min": 3.944462150684558e-05, "clip_ratio/region_mean": 0.0020910485764034092, "epoch": 1.3546223388743073, "grad_norm": 0.19566044211387634, "learning_rate": 1e-06, "loss": -0.0676, "step": 580 }, { "clip_ratio/high_max": 0.0024408697499893606, "clip_ratio/high_mean": 0.000832719366371748, "clip_ratio/low_mean": 0.0005201030976422771, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013528224553738255, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 1369.0457763671875, "completions/mean_terminated_length": 649.8067626953125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.356955380577428, "grad_norm": 0.2526213526725769, "learning_rate": 1e-06, "loss": -0.0483, "num_tokens": 87579321.0, "reward": 0.535714328289032, "reward_std": 0.1345217078924179, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 581 }, { "clip_ratio/high_max": 0.0030322665261337534, "clip_ratio/high_mean": 0.0010206833885604283, "clip_ratio/low_mean": 0.0007519806695199804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017726640435284935, "epoch": 1.3592884222805481, "grad_norm": 0.25519487261772156, "learning_rate": 1e-06, "loss": -0.0485, "step": 582 }, { "clip_ratio/high_max": 0.003515000549668912, "clip_ratio/high_mean": 0.0011885368476214353, "clip_ratio/low_mean": 0.0009024976052387501, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002091034493787447, "epoch": 1.3616214639836688, "grad_norm": 0.4738200902938843, "learning_rate": 1e-06, "loss": -0.0487, "step": 583 }, { "clip_ratio/high_max": 0.003024574962182669, "clip_ratio/high_mean": 0.0009989466548177006, "clip_ratio/low_mean": 0.0010811063275468769, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002080053040117491, "epoch": 1.3639545056867892, "grad_norm": 0.2041773796081543, "learning_rate": 1e-06, "loss": -0.0488, "step": 584 }, { "clip_ratio/high_max": 0.002207355821155943, "clip_ratio/high_mean": 0.0008487261347909225, "clip_ratio/low_mean": 0.00045651231687315885, "clip_ratio/low_min": 1.7443482647649944e-05, "clip_ratio/region_mean": 0.0013052384383627214, "completions/clipped_ratio": 0.2042410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 1383.12841796875, "completions/mean_terminated_length": 686.8372802734375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 1.3662875473899097, "grad_norm": 0.28928208351135254, "learning_rate": 1e-06, "loss": -0.0712, "num_tokens": 88156644.0, "reward": 0.5055803656578064, "reward_std": 0.16517673432826996, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 585 }, { "clip_ratio/high_max": 0.0027591592152020894, "clip_ratio/high_mean": 0.0010814143024617806, "clip_ratio/low_mean": 0.0007400381391562405, "clip_ratio/low_min": 1.3213531019573566e-05, "clip_ratio/region_mean": 0.0018214524934592191, "epoch": 1.36862058909303, "grad_norm": 0.23145802319049835, "learning_rate": 1e-06, "loss": -0.0714, "step": 586 }, { "clip_ratio/high_max": 0.002570676511822967, "clip_ratio/high_mean": 0.0010192344198003411, "clip_ratio/low_mean": 0.0008793568467808655, "clip_ratio/low_min": 2.402613972662948e-05, "clip_ratio/region_mean": 0.0018985912993230158, "epoch": 1.3709536307961505, "grad_norm": 0.20198185741901398, "learning_rate": 1e-06, "loss": -0.0716, "step": 587 }, { "clip_ratio/high_max": 0.002756901980319526, "clip_ratio/high_mean": 0.0010476693350938149, "clip_ratio/low_mean": 0.0010282289204042172, "clip_ratio/low_min": 5.233044794294983e-05, "clip_ratio/region_mean": 0.0020758982536790427, "epoch": 1.373286672499271, "grad_norm": 0.19303227961063385, "learning_rate": 1e-06, "loss": -0.0717, "step": 588 }, { "clip_ratio/high_max": 0.0030537584316334687, "clip_ratio/high_mean": 0.0011682229196594562, "clip_ratio/low_mean": 0.0005520252943824744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001720248234050814, "completions/clipped_ratio": 0.21875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3188.0, "completions/mean_length": 1431.64404296875, "completions/mean_terminated_length": 685.624267578125, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 1.3756197142023914, "grad_norm": 0.299399733543396, "learning_rate": 1e-06, "loss": -0.1017, "num_tokens": 88720029.0, "reward": 0.53125, "reward_std": 0.19854412972927094, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 589 }, { "clip_ratio/high_max": 0.0032559526662225835, "clip_ratio/high_mean": 0.0012779856951965485, "clip_ratio/low_mean": 0.0008215541784011293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020995398444938473, "epoch": 1.3779527559055118, "grad_norm": 0.254035621881485, "learning_rate": 1e-06, "loss": -0.102, "step": 590 }, { "clip_ratio/high_max": 0.003467716960585676, "clip_ratio/high_mean": 0.0013532105476770084, "clip_ratio/low_mean": 0.001074977066309657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002428187603072729, "epoch": 1.3802857976086322, "grad_norm": 0.22269730269908905, "learning_rate": 1e-06, "loss": -0.1022, "step": 591 }, { "clip_ratio/high_max": 0.0038947790453676134, "clip_ratio/high_mean": 0.0014983424262027256, "clip_ratio/low_mean": 0.0012840630570281064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027824054850498214, "epoch": 1.3826188393117527, "grad_norm": 0.22764192521572113, "learning_rate": 1e-06, "loss": -0.1023, "step": 592 }, { "clip_ratio/high_max": 0.0021997439180267975, "clip_ratio/high_mean": 0.0010206915576418396, "clip_ratio/low_mean": 0.00040406626385447453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001424757832864998, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3147.0, "completions/mean_length": 1177.2935791015625, "completions/mean_terminated_length": 632.2106323242188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.384951881014873, "grad_norm": 0.3221670985221863, "learning_rate": 1e-06, "loss": -0.0642, "num_tokens": 89281972.0, "reward": 0.6116071939468384, "reward_std": 0.163072407245636, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 593 }, { "clip_ratio/high_max": 0.003074386069783941, "clip_ratio/high_mean": 0.0013346814266697038, "clip_ratio/low_mean": 0.0006285099052547594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001963191360118799, "epoch": 1.3872849227179935, "grad_norm": 0.26880836486816406, "learning_rate": 1e-06, "loss": -0.0646, "step": 594 }, { "clip_ratio/high_max": 0.0026226171030430123, "clip_ratio/high_mean": 0.0012232904336997308, "clip_ratio/low_mean": 0.0008326363977175788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002055926750472281, "epoch": 1.389617964421114, "grad_norm": 0.2298322468996048, "learning_rate": 1e-06, "loss": -0.0647, "step": 595 }, { "clip_ratio/high_max": 0.0031332553407992236, "clip_ratio/high_mean": 0.0012834487861255184, "clip_ratio/low_mean": 0.0010289407300660969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023123895152821206, "epoch": 1.3919510061242344, "grad_norm": 0.17754845321178436, "learning_rate": 1e-06, "loss": -0.0648, "step": 596 }, { "clip_ratio/high_max": 0.0015846236638026312, "clip_ratio/high_mean": 0.0005263043115064647, "clip_ratio/low_mean": 0.0002776564401756332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008039607509999769, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3086.0, "completions/mean_length": 1184.8817138671875, "completions/mean_terminated_length": 641.2158813476562, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 1.3942840478273548, "grad_norm": 0.17424309253692627, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 89862346.0, "reward": 0.5926339626312256, "reward_std": 0.10697808116674423, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 597 }, { "clip_ratio/high_max": 0.0016562970013183076, "clip_ratio/high_mean": 0.0005918571487200097, "clip_ratio/low_mean": 0.00033543918425493757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000927296341615147, "epoch": 1.3966170895304755, "grad_norm": 0.15606385469436646, "learning_rate": 1e-06, "loss": -0.0551, "step": 598 }, { "clip_ratio/high_max": 0.0018177629535784945, "clip_ratio/high_mean": 0.0006014393620716874, "clip_ratio/low_mean": 0.000457512957950712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010589522898953874, "epoch": 1.3989501312335957, "grad_norm": 0.15585005283355713, "learning_rate": 1e-06, "loss": -0.0551, "step": 599 }, { "clip_ratio/high_max": 0.001933376139277243, "clip_ratio/high_mean": 0.0006321645703337708, "clip_ratio/low_mean": 0.0005467807463901408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011789453419623896, "epoch": 1.4012831729367163, "grad_norm": 0.14662864804267883, "learning_rate": 1e-06, "loss": -0.0552, "step": 600 }, { "clip_ratio/high_max": 0.0021389451321738306, "clip_ratio/high_mean": 0.0008527110221621115, "clip_ratio/low_mean": 0.0005508000540430658, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001403511079843156, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 1268.9832763671875, "completions/mean_terminated_length": 626.12744140625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.4036162146398368, "grad_norm": 0.3296069800853729, "learning_rate": 1e-06, "loss": -0.0579, "num_tokens": 90416619.0, "reward": 0.5424107313156128, "reward_std": 0.15852880477905273, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 601 }, { "clip_ratio/high_max": 0.0026905527847702615, "clip_ratio/high_mean": 0.001033009248203598, "clip_ratio/low_mean": 0.0008065595188782027, "clip_ratio/low_min": 1.9512955987011082e-05, "clip_ratio/region_mean": 0.0018395687729935162, "epoch": 1.4059492563429572, "grad_norm": 0.30061817169189453, "learning_rate": 1e-06, "loss": -0.0581, "step": 602 }, { "clip_ratio/high_max": 0.002843020934960805, "clip_ratio/high_mean": 0.0009820305567700416, "clip_ratio/low_mean": 0.0010391084992988908, "clip_ratio/low_min": 5.8049536164617166e-05, "clip_ratio/region_mean": 0.002021139021962881, "epoch": 1.4082822980460776, "grad_norm": 0.37664586305618286, "learning_rate": 1e-06, "loss": -0.0584, "step": 603 }, { "clip_ratio/high_max": 0.002560520537372213, "clip_ratio/high_mean": 0.0009429810361325508, "clip_ratio/low_mean": 0.001355129446437786, "clip_ratio/low_min": 3.9025911974022165e-05, "clip_ratio/region_mean": 0.0022981104848440737, "epoch": 1.410615339749198, "grad_norm": 0.22192707657814026, "learning_rate": 1e-06, "loss": -0.0584, "step": 604 }, { "clip_ratio/high_max": 0.0021960718695481773, "clip_ratio/high_mean": 0.0009144201030721888, "clip_ratio/low_mean": 0.0006570868190465262, "clip_ratio/low_min": 3.0317729397211224e-05, "clip_ratio/region_mean": 0.001571506931213662, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 1080.6138916015625, "completions/mean_terminated_length": 618.7979736328125, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 1.4129483814523185, "grad_norm": 0.32067176699638367, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 90981905.0, "reward": 0.6473214626312256, "reward_std": 0.16586002707481384, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807061672210693, "step": 605 }, { "clip_ratio/high_max": 0.0025493590328551363, "clip_ratio/high_mean": 0.0010450735744598205, "clip_ratio/low_mean": 0.0009328416072094114, "clip_ratio/low_min": 6.464625766966492e-05, "clip_ratio/region_mean": 0.0019779151625698432, "epoch": 1.415281423155439, "grad_norm": 0.23323972523212433, "learning_rate": 1e-06, "loss": -0.0521, "step": 606 }, { "clip_ratio/high_max": 0.002687867498025298, "clip_ratio/high_mean": 0.0010876679752982454, "clip_ratio/low_mean": 0.0010807789094542386, "clip_ratio/low_min": 4.5476594095816836e-05, "clip_ratio/region_mean": 0.002168446888390463, "epoch": 1.4176144648585594, "grad_norm": 0.20053008198738098, "learning_rate": 1e-06, "loss": -0.0523, "step": 607 }, { "clip_ratio/high_max": 0.0026580548619676847, "clip_ratio/high_mean": 0.001002402326776064, "clip_ratio/low_mean": 0.0013172924864193192, "clip_ratio/low_min": 0.00010611205652821809, "clip_ratio/region_mean": 0.0023196948823169805, "epoch": 1.4199475065616798, "grad_norm": 0.20511440932750702, "learning_rate": 1e-06, "loss": -0.0523, "step": 608 }, { "clip_ratio/high_max": 0.0022464193098130636, "clip_ratio/high_mean": 0.0008555043514206773, "clip_ratio/low_mean": 0.0005546764232349233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014101807100814767, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3897.0, "completions/mean_length": 1150.9732666015625, "completions/mean_terminated_length": 619.3939208984375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.4222805482648002, "grad_norm": 0.3018054962158203, "learning_rate": 1e-06, "loss": -0.0708, "num_tokens": 91541249.0, "reward": 0.5613839626312256, "reward_std": 0.16461403667926788, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 609 }, { "clip_ratio/high_max": 0.002809393459756393, "clip_ratio/high_mean": 0.001057904966728529, "clip_ratio/low_mean": 0.0007229856068988738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017808905977290124, "epoch": 1.4246135899679206, "grad_norm": 0.2314707636833191, "learning_rate": 1e-06, "loss": -0.071, "step": 610 }, { "clip_ratio/high_max": 0.0028084185032639652, "clip_ratio/high_mean": 0.0010672877251636237, "clip_ratio/low_mean": 0.0008877732725522947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001955060964974109, "epoch": 1.426946631671041, "grad_norm": 0.4472660720348358, "learning_rate": 1e-06, "loss": -0.0711, "step": 611 }, { "clip_ratio/high_max": 0.00276724090508651, "clip_ratio/high_mean": 0.0010477610740053933, "clip_ratio/low_mean": 0.0010973095450026449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021450706335599534, "epoch": 1.4292796733741615, "grad_norm": 0.2138577103614807, "learning_rate": 1e-06, "loss": -0.0712, "step": 612 }, { "clip_ratio/high_max": 0.002117222800734453, "clip_ratio/high_mean": 0.0008917890409065876, "clip_ratio/low_mean": 0.0007474229569197632, "clip_ratio/low_min": 1.2669775060203392e-05, "clip_ratio/region_mean": 0.0016392120087402873, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3247.0, "completions/mean_length": 1161.48779296875, "completions/mean_terminated_length": 658.9765014648438, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.431612715077282, "grad_norm": 0.2706722617149353, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 92127894.0, "reward": 0.5803571939468384, "reward_std": 0.1737132966518402, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 613 }, { "clip_ratio/high_max": 0.0027357905491953716, "clip_ratio/high_mean": 0.001020476996927755, "clip_ratio/low_mean": 0.0009552073897793889, "clip_ratio/low_min": 1.2669775060203392e-05, "clip_ratio/region_mean": 0.001975684383069165, "epoch": 1.4339457567804024, "grad_norm": 0.2501928210258484, "learning_rate": 1e-06, "loss": -0.0492, "step": 614 }, { "clip_ratio/high_max": 0.0029026706470176578, "clip_ratio/high_mean": 0.0010749668326752726, "clip_ratio/low_mean": 0.0011807225237134844, "clip_ratio/low_min": 2.5339550120406784e-05, "clip_ratio/region_mean": 0.002255689279991202, "epoch": 1.436278798483523, "grad_norm": 0.2090653032064438, "learning_rate": 1e-06, "loss": -0.0495, "step": 615 }, { "clip_ratio/high_max": 0.002555416082032025, "clip_ratio/high_mean": 0.0009618200056138448, "clip_ratio/low_mean": 0.0014350209457916208, "clip_ratio/low_min": 2.5572831873432733e-05, "clip_ratio/region_mean": 0.002396840944129508, "epoch": 1.4386118401866432, "grad_norm": 0.2010267674922943, "learning_rate": 1e-06, "loss": -0.0495, "step": 616 }, { "clip_ratio/high_max": 0.0022100016140029766, "clip_ratio/high_mean": 0.0008261815728474176, "clip_ratio/low_mean": 0.00046243821452662814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012886197873740457, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 1030.7645263671875, "completions/mean_terminated_length": 597.3363037109375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 1.4409448818897639, "grad_norm": 0.30851060152053833, "learning_rate": 1e-06, "loss": -0.0658, "num_tokens": 92681131.0, "reward": 0.6428571939468384, "reward_std": 0.1633780300617218, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 617 }, { "clip_ratio/high_max": 0.002810432379192207, "clip_ratio/high_mean": 0.0011620204331848072, "clip_ratio/low_mean": 0.0006768962748537888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018389166871202178, "epoch": 1.443277923592884, "grad_norm": 0.2598080039024353, "learning_rate": 1e-06, "loss": -0.0661, "step": 618 }, { "clip_ratio/high_max": 0.002908035930886399, "clip_ratio/high_mean": 0.0011905909195775166, "clip_ratio/low_mean": 0.0008262346800620435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020168255796306767, "epoch": 1.4456109652960047, "grad_norm": 0.19690977036952972, "learning_rate": 1e-06, "loss": -0.0663, "step": 619 }, { "clip_ratio/high_max": 0.0025863999107968993, "clip_ratio/high_mean": 0.0010566708187980112, "clip_ratio/low_mean": 0.0009493513525740127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002006022143177688, "epoch": 1.4479440069991252, "grad_norm": 0.23134976625442505, "learning_rate": 1e-06, "loss": -0.0663, "step": 620 }, { "clip_ratio/high_max": 0.002198130972828949, "clip_ratio/high_mean": 0.0007040815144137014, "clip_ratio/low_mean": 0.000685114953739685, "clip_ratio/low_min": 1.1968594662903342e-05, "clip_ratio/region_mean": 0.0013891964590584394, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 1098.3035888671875, "completions/mean_terminated_length": 630.276123046875, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 1.4502770487022456, "grad_norm": 0.3217826187610626, "learning_rate": 1e-06, "loss": -0.0273, "num_tokens": 93258899.0, "reward": 0.5680803656578064, "reward_std": 0.1669451892375946, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 621 }, { "clip_ratio/high_max": 0.002298268344020471, "clip_ratio/high_mean": 0.0008164960509020602, "clip_ratio/low_mean": 0.0009055617429112317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017220578229171224, "epoch": 1.452610090405366, "grad_norm": 0.21622058749198914, "learning_rate": 1e-06, "loss": -0.0276, "step": 622 }, { "clip_ratio/high_max": 0.0024621934062452056, "clip_ratio/high_mean": 0.0007997809279913781, "clip_ratio/low_mean": 0.0011010635334969265, "clip_ratio/low_min": 1.1968594662903342e-05, "clip_ratio/region_mean": 0.001900844436022453, "epoch": 1.4549431321084865, "grad_norm": 0.20084120333194733, "learning_rate": 1e-06, "loss": -0.0278, "step": 623 }, { "clip_ratio/high_max": 0.0023159858101280406, "clip_ratio/high_mean": 0.0008225301498896442, "clip_ratio/low_mean": 0.0012968940864084288, "clip_ratio/low_min": 1.1968594662903342e-05, "clip_ratio/region_mean": 0.0021194242290221155, "epoch": 1.457276173811607, "grad_norm": 0.1809324324131012, "learning_rate": 1e-06, "loss": -0.0279, "step": 624 }, { "clip_ratio/high_max": 0.0020966012234566733, "clip_ratio/high_mean": 0.0008217469148803502, "clip_ratio/low_mean": 0.0005623789793389733, "clip_ratio/low_min": 1.354573032585904e-05, "clip_ratio/region_mean": 0.0013841258842148818, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 1279.501220703125, "completions/mean_terminated_length": 708.6403198242188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.4596092155147273, "grad_norm": 0.2877887785434723, "learning_rate": 1e-06, "loss": -0.0728, "num_tokens": 93878340.0, "reward": 0.5446428656578064, "reward_std": 0.16566601395606995, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 625 }, { "clip_ratio/high_max": 0.00275848429009784, "clip_ratio/high_mean": 0.0010338401443732437, "clip_ratio/low_mean": 0.0008302076239488088, "clip_ratio/low_min": 3.299023592262529e-05, "clip_ratio/region_mean": 0.0018640477137523703, "epoch": 1.4619422572178478, "grad_norm": 0.20693060755729675, "learning_rate": 1e-06, "loss": -0.0731, "step": 626 }, { "clip_ratio/high_max": 0.0028259688333491795, "clip_ratio/high_mean": 0.001058114408806432, "clip_ratio/low_mean": 0.0009656888905738015, "clip_ratio/low_min": 5.362869706004858e-05, "clip_ratio/region_mean": 0.0020238032666384242, "epoch": 1.4642752989209682, "grad_norm": 0.1813107430934906, "learning_rate": 1e-06, "loss": -0.0733, "step": 627 }, { "clip_ratio/high_max": 0.0028315739036770537, "clip_ratio/high_mean": 0.0009843532243394293, "clip_ratio/low_mean": 0.0011155190331919584, "clip_ratio/low_min": 9.688676800578833e-05, "clip_ratio/region_mean": 0.0020998722466174513, "epoch": 1.4666083406240886, "grad_norm": 0.17822666466236115, "learning_rate": 1e-06, "loss": -0.0733, "step": 628 }, { "clip_ratio/high_max": 0.0019807805183518212, "clip_ratio/high_mean": 0.0008326768383994931, "clip_ratio/low_mean": 0.00034694392343226355, "clip_ratio/low_min": 1.3994625987834297e-05, "clip_ratio/region_mean": 0.001179620736365905, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 1252.6484375, "completions/mean_terminated_length": 657.88525390625, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 1.468941382327209, "grad_norm": 0.3137721121311188, "learning_rate": 1e-06, "loss": -0.0701, "num_tokens": 94456689.0, "reward": 0.5290178656578064, "reward_std": 0.15665017068386078, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 629 }, { "clip_ratio/high_max": 0.00227245543646859, "clip_ratio/high_mean": 0.0009395831893925788, "clip_ratio/low_mean": 0.0005534355395866442, "clip_ratio/low_min": 2.0550196495605633e-05, "clip_ratio/region_mean": 0.0014930187171557918, "epoch": 1.4712744240303295, "grad_norm": 0.29060402512550354, "learning_rate": 1e-06, "loss": -0.0703, "step": 630 }, { "clip_ratio/high_max": 0.00282437232817756, "clip_ratio/high_mean": 0.0010493311092432123, "clip_ratio/low_mean": 0.0006986428397794953, "clip_ratio/low_min": 6.8500658017001115e-06, "clip_ratio/region_mean": 0.0017479739581176545, "epoch": 1.47360746573345, "grad_norm": 0.18267522752285004, "learning_rate": 1e-06, "loss": -0.0705, "step": 631 }, { "clip_ratio/high_max": 0.0026695308988564648, "clip_ratio/high_mean": 0.0010461520869284868, "clip_ratio/low_mean": 0.0008833047941152472, "clip_ratio/low_min": 4.795045970240608e-05, "clip_ratio/region_mean": 0.001929456844663946, "epoch": 1.4759405074365703, "grad_norm": 0.24536316096782684, "learning_rate": 1e-06, "loss": -0.0705, "step": 632 }, { "clip_ratio/high_max": 0.0018891414983954746, "clip_ratio/high_mean": 0.0008695932247064775, "clip_ratio/low_mean": 0.0004981261481589172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013677193237526808, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 1132.751220703125, "completions/mean_terminated_length": 670.1019897460938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.4782735491396908, "grad_norm": 0.3137512803077698, "learning_rate": 1e-06, "loss": -0.0655, "num_tokens": 95054562.0, "reward": 0.5881696939468384, "reward_std": 0.17784713208675385, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 633 }, { "clip_ratio/high_max": 0.0026375512752565555, "clip_ratio/high_mean": 0.0011333199290675111, "clip_ratio/low_mean": 0.0007825907032383839, "clip_ratio/low_min": 2.1226014723652042e-05, "clip_ratio/region_mean": 0.0019159107105224393, "epoch": 1.4806065908428114, "grad_norm": 0.23053784668445587, "learning_rate": 1e-06, "loss": -0.0658, "step": 634 }, { "clip_ratio/high_max": 0.002540815963584464, "clip_ratio/high_mean": 0.0011436544646130642, "clip_ratio/low_mean": 0.0009427386703464435, "clip_ratio/low_min": 1.0613007361826021e-05, "clip_ratio/region_mean": 0.0020863931204075925, "epoch": 1.4829396325459316, "grad_norm": 0.2093677967786789, "learning_rate": 1e-06, "loss": -0.0659, "step": 635 }, { "clip_ratio/high_max": 0.002613451928482391, "clip_ratio/high_mean": 0.0011095456502516754, "clip_ratio/low_mean": 0.0011048078104067827, "clip_ratio/low_min": 4.2452029447304085e-05, "clip_ratio/region_mean": 0.0022143534224596806, "epoch": 1.4852726742490523, "grad_norm": 0.1752631515264511, "learning_rate": 1e-06, "loss": -0.0661, "step": 636 }, { "clip_ratio/high_max": 0.0017787703473004512, "clip_ratio/high_mean": 0.0007677474686715868, "clip_ratio/low_mean": 0.000416842095546599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001184589524200419, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 1191.4888916015625, "completions/mean_terminated_length": 680.7218017578125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 1.4876057159521727, "grad_norm": 0.20887115597724915, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 95663376.0, "reward": 0.5848214626312256, "reward_std": 0.15582671761512756, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 637 }, { "clip_ratio/high_max": 0.0020016869930259418, "clip_ratio/high_mean": 0.0008568723087591934, "clip_ratio/low_mean": 0.0005421767809821176, "clip_ratio/low_min": 1.433815123164095e-05, "clip_ratio/region_mean": 0.0013990490406285971, "epoch": 1.4899387576552932, "grad_norm": 0.24404728412628174, "learning_rate": 1e-06, "loss": -0.0507, "step": 638 }, { "clip_ratio/high_max": 0.0021760556192020886, "clip_ratio/high_mean": 0.0009449577919440344, "clip_ratio/low_mean": 0.0007495746449421858, "clip_ratio/low_min": 4.012785393570084e-05, "clip_ratio/region_mean": 0.0016945324241532944, "epoch": 1.4922717993584136, "grad_norm": 0.190024733543396, "learning_rate": 1e-06, "loss": -0.0509, "step": 639 }, { "clip_ratio/high_max": 0.0021634527256537694, "clip_ratio/high_mean": 0.0009296696362071089, "clip_ratio/low_mean": 0.0008504205998178804, "clip_ratio/low_min": 3.849515451292973e-05, "clip_ratio/region_mean": 0.0017800902423914522, "epoch": 1.494604841061534, "grad_norm": 0.17286677658557892, "learning_rate": 1e-06, "loss": -0.0509, "step": 640 }, { "clip_ratio/high_max": 0.0019069292393396609, "clip_ratio/high_mean": 0.000733642582417815, "clip_ratio/low_mean": 0.0004784600541825057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012121026484237518, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2620.0, "completions/mean_length": 1143.93310546875, "completions/mean_terminated_length": 696.1902465820312, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.4969378827646544, "grad_norm": 0.2399642914533615, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 96287404.0, "reward": 0.5725446939468384, "reward_std": 0.15018516778945923, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 641 }, { "clip_ratio/high_max": 0.002361362647206988, "clip_ratio/high_mean": 0.0009156956712104147, "clip_ratio/low_mean": 0.0007014144121058052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016171101087820716, "epoch": 1.4992709244677749, "grad_norm": 0.2206823080778122, "learning_rate": 1e-06, "loss": -0.0395, "step": 642 }, { "clip_ratio/high_max": 0.0022667571320198476, "clip_ratio/high_mean": 0.0009398542388225906, "clip_ratio/low_mean": 0.0008125084077619249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001752362644765526, "epoch": 1.5016039661708953, "grad_norm": 0.2108108103275299, "learning_rate": 1e-06, "loss": -0.0396, "step": 643 }, { "clip_ratio/high_max": 0.002244658535346389, "clip_ratio/high_mean": 0.0008674152304593008, "clip_ratio/low_mean": 0.0009937553195413784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018611705527291633, "epoch": 1.5039370078740157, "grad_norm": 0.20998287200927734, "learning_rate": 1e-06, "loss": -0.0397, "step": 644 }, { "clip_ratio/high_max": 0.002750497020315379, "clip_ratio/high_mean": 0.001084525643818779, "clip_ratio/low_mean": 0.000655712927255081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017402385419700295, "completions/clipped_ratio": 0.1897321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3668.0, "completions/mean_length": 1344.53125, "completions/mean_terminated_length": 700.2479248046875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.5062700495771362, "grad_norm": 0.2975298762321472, "learning_rate": 1e-06, "loss": -0.0639, "num_tokens": 96891224.0, "reward": 0.4933035969734192, "reward_std": 0.2048664689064026, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 645 }, { "clip_ratio/high_max": 0.003178340135491453, "clip_ratio/high_mean": 0.0013070478162262589, "clip_ratio/low_mean": 0.0008972011510195443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002204249001806602, "epoch": 1.5086030912802566, "grad_norm": 0.2677991986274719, "learning_rate": 1e-06, "loss": -0.0642, "step": 646 }, { "clip_ratio/high_max": 0.003427130046475213, "clip_ratio/high_mean": 0.0014177916236803867, "clip_ratio/low_mean": 0.0010971521915053017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025149437788059004, "epoch": 1.510936132983377, "grad_norm": 0.21310929954051971, "learning_rate": 1e-06, "loss": -0.0644, "step": 647 }, { "clip_ratio/high_max": 0.0029649059470102657, "clip_ratio/high_mean": 0.001309787250647787, "clip_ratio/low_mean": 0.0012619099179573823, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025716971649671905, "epoch": 1.5132691746864975, "grad_norm": 0.2274923026561737, "learning_rate": 1e-06, "loss": -0.0645, "step": 648 }, { "clip_ratio/high_max": 0.002781429029710125, "clip_ratio/high_mean": 0.001106526000512531, "clip_ratio/low_mean": 0.0006171891363919713, "clip_ratio/low_min": 3.19901673719869e-05, "clip_ratio/region_mean": 0.00172371514418046, "completions/clipped_ratio": 0.1953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3042.0, "completions/mean_length": 1327.8929443359375, "completions/mean_terminated_length": 656.022216796875, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.5156022163896181, "grad_norm": 0.3981626033782959, "learning_rate": 1e-06, "loss": -0.0705, "num_tokens": 97465832.0, "reward": 0.4977678656578064, "reward_std": 0.19670122861862183, "rewards/verify_math_reward/mean": 0.4977678656578064, "rewards/verify_math_reward/std": 0.5002742409706116, "step": 649 }, { "clip_ratio/high_max": 0.0034815168255590834, "clip_ratio/high_mean": 0.0014658952386525925, "clip_ratio/low_mean": 0.001077957467714441, "clip_ratio/low_min": 3.458586161286803e-05, "clip_ratio/region_mean": 0.002543852722737938, "epoch": 1.5179352580927383, "grad_norm": 0.34394311904907227, "learning_rate": 1e-06, "loss": -0.0708, "step": 650 }, { "clip_ratio/high_max": 0.00356647145235911, "clip_ratio/high_mean": 0.0014375105420185719, "clip_ratio/low_mean": 0.0013765477087872569, "clip_ratio/low_min": 2.7237243102717912e-05, "clip_ratio/region_mean": 0.0028140582362539135, "epoch": 1.520268299795859, "grad_norm": 0.34004664421081543, "learning_rate": 1e-06, "loss": -0.0711, "step": 651 }, { "clip_ratio/high_max": 0.003366349876159802, "clip_ratio/high_mean": 0.0014541119999194052, "clip_ratio/low_mean": 0.0016202135921048466, "clip_ratio/low_min": 3.19901673719869e-05, "clip_ratio/region_mean": 0.0030743255192646757, "epoch": 1.5226013414989792, "grad_norm": 0.2796070873737335, "learning_rate": 1e-06, "loss": -0.0712, "step": 652 }, { "clip_ratio/high_max": 0.0021140613534953445, "clip_ratio/high_mean": 0.0007855239236960188, "clip_ratio/low_mean": 0.0005137370571901556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012992609772481956, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 1189.734375, "completions/mean_terminated_length": 619.3458251953125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 1.5249343832020998, "grad_norm": 0.25421372056007385, "learning_rate": 1e-06, "loss": -0.069, "num_tokens": 98023210.0, "reward": 0.5267857313156128, "reward_std": 0.15134702622890472, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 653 }, { "clip_ratio/high_max": 0.002797064465994481, "clip_ratio/high_mean": 0.0010685569450288313, "clip_ratio/low_mean": 0.0007312407378776697, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001799797682906501, "epoch": 1.52726742490522, "grad_norm": 0.2175801396369934, "learning_rate": 1e-06, "loss": -0.0692, "step": 654 }, { "clip_ratio/high_max": 0.0028732655955536757, "clip_ratio/high_mean": 0.001066129141690908, "clip_ratio/low_mean": 0.0008831094664856209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019492386272759177, "epoch": 1.5296004666083407, "grad_norm": 0.20748473703861237, "learning_rate": 1e-06, "loss": -0.0694, "step": 655 }, { "clip_ratio/high_max": 0.0029239171999506652, "clip_ratio/high_mean": 0.0010204350219282787, "clip_ratio/low_mean": 0.0011148508874612162, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021352858893806115, "epoch": 1.531933508311461, "grad_norm": 0.18779869377613068, "learning_rate": 1e-06, "loss": -0.0695, "step": 656 }, { "clip_ratio/high_max": 0.00239759481701185, "clip_ratio/high_mean": 0.0010145321357413195, "clip_ratio/low_mean": 0.0006554852961926372, "clip_ratio/low_min": 1.1144793461426161e-05, "clip_ratio/region_mean": 0.0016700173982826527, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3998.0, "completions/mean_length": 1311.765625, "completions/mean_terminated_length": 692.6248168945312, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.5342665500145816, "grad_norm": 0.283419668674469, "learning_rate": 1e-06, "loss": -0.1052, "num_tokens": 98622792.0, "reward": 0.5613839626312256, "reward_std": 0.19895784556865692, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 657 }, { "clip_ratio/high_max": 0.00306125360657461, "clip_ratio/high_mean": 0.0013394121815508697, "clip_ratio/low_mean": 0.0008616015857114689, "clip_ratio/low_min": 2.059986763924826e-05, "clip_ratio/region_mean": 0.00220101373270154, "epoch": 1.536599591717702, "grad_norm": 0.27444544434547424, "learning_rate": 1e-06, "loss": -0.1053, "step": 658 }, { "clip_ratio/high_max": 0.0033300267459708266, "clip_ratio/high_mean": 0.0012730805719911586, "clip_ratio/low_mean": 0.000994399935734691, "clip_ratio/low_min": 2.059986763924826e-05, "clip_ratio/region_mean": 0.0022674804786220193, "epoch": 1.5389326334208224, "grad_norm": 0.20978401601314545, "learning_rate": 1e-06, "loss": -0.1055, "step": 659 }, { "clip_ratio/high_max": 0.0031130103016039357, "clip_ratio/high_mean": 0.001303149856539676, "clip_ratio/low_mean": 0.0013752163686149288, "clip_ratio/low_min": 4.119973527849652e-05, "clip_ratio/region_mean": 0.0026783662542584352, "epoch": 1.5412656751239429, "grad_norm": 0.2526197135448456, "learning_rate": 1e-06, "loss": -0.1056, "step": 660 }, { "clip_ratio/high_max": 0.0021811685219290666, "clip_ratio/high_mean": 0.0008479951702611288, "clip_ratio/low_mean": 0.0005780810424766969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014260761854529846, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 1176.641845703125, "completions/mean_terminated_length": 594.3333129882812, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.5435987168270633, "grad_norm": 0.2901129126548767, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 99160319.0, "reward": 0.5691964626312256, "reward_std": 0.14368948340415955, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 661 }, { "clip_ratio/high_max": 0.0025631569806137122, "clip_ratio/high_mean": 0.0010572301398497075, "clip_ratio/low_mean": 0.0007841288665986212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018413590514683165, "epoch": 1.5459317585301837, "grad_norm": 0.23473727703094482, "learning_rate": 1e-06, "loss": -0.0518, "step": 662 }, { "clip_ratio/high_max": 0.0025974552845582366, "clip_ratio/high_mean": 0.0010332157580705825, "clip_ratio/low_mean": 0.0009663371947681298, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00199955292919185, "epoch": 1.5482648002333042, "grad_norm": 0.2675100862979889, "learning_rate": 1e-06, "loss": -0.052, "step": 663 }, { "clip_ratio/high_max": 0.00247337876498932, "clip_ratio/high_mean": 0.0009737393938848982, "clip_ratio/low_mean": 0.0012372789251458016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022110183563199826, "epoch": 1.5505978419364246, "grad_norm": 0.18326008319854736, "learning_rate": 1e-06, "loss": -0.0521, "step": 664 }, { "clip_ratio/high_max": 0.002578391147835646, "clip_ratio/high_mean": 0.001082983690139372, "clip_ratio/low_mean": 0.0006028006682754494, "clip_ratio/low_min": 7.835025826352648e-06, "clip_ratio/region_mean": 0.0016857843802426942, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3464.0, "completions/mean_length": 1177.068115234375, "completions/mean_terminated_length": 645.653076171875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.552930883639545, "grad_norm": 0.31763285398483276, "learning_rate": 1e-06, "loss": -0.0767, "num_tokens": 99732628.0, "reward": 0.5881696939468384, "reward_std": 0.1853206306695938, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 665 }, { "clip_ratio/high_max": 0.0025883031557896174, "clip_ratio/high_mean": 0.0011414089749450795, "clip_ratio/low_mean": 0.0008020186633075355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019434275673120283, "epoch": 1.5552639253426657, "grad_norm": 0.2709817588329315, "learning_rate": 1e-06, "loss": -0.0769, "step": 666 }, { "clip_ratio/high_max": 0.00291282403486548, "clip_ratio/high_mean": 0.0012065014489053283, "clip_ratio/low_mean": 0.0009946562222467037, "clip_ratio/low_min": 7.835025826352648e-06, "clip_ratio/region_mean": 0.0022011576729710214, "epoch": 1.5575969670457859, "grad_norm": 0.2394454926252365, "learning_rate": 1e-06, "loss": -0.0772, "step": 667 }, { "clip_ratio/high_max": 0.002633153955684975, "clip_ratio/high_mean": 0.001112860440116492, "clip_ratio/low_mean": 0.0012822716889786534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002395132127276156, "epoch": 1.5599300087489065, "grad_norm": 0.22501912713050842, "learning_rate": 1e-06, "loss": -0.0773, "step": 668 }, { "clip_ratio/high_max": 0.0025296201347373426, "clip_ratio/high_mean": 0.0008875593648554059, "clip_ratio/low_mean": 0.00047986642357500386, "clip_ratio/low_min": 1.5363815691671334e-05, "clip_ratio/region_mean": 0.0013674257788807154, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3748.0, "completions/mean_length": 1283.4810791015625, "completions/mean_terminated_length": 713.4268798828125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 1.5622630504520267, "grad_norm": 0.2584800720214844, "learning_rate": 1e-06, "loss": -0.0459, "num_tokens": 100348475.0, "reward": 0.5491071939468384, "reward_std": 0.14849409461021423, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 669 }, { "clip_ratio/high_max": 0.0023968944551597815, "clip_ratio/high_mean": 0.0009445089151540742, "clip_ratio/low_mean": 0.0006333161124985054, "clip_ratio/low_min": 1.5363815691671334e-05, "clip_ratio/region_mean": 0.001577825049025705, "epoch": 1.5645960921551474, "grad_norm": 0.23188327252864838, "learning_rate": 1e-06, "loss": -0.046, "step": 670 }, { "clip_ratio/high_max": 0.00333152122766478, "clip_ratio/high_mean": 0.0010798854073073016, "clip_ratio/low_mean": 0.0007709232504566899, "clip_ratio/low_min": 3.16937112074811e-05, "clip_ratio/region_mean": 0.001850808686867822, "epoch": 1.5669291338582676, "grad_norm": 0.1995295137166977, "learning_rate": 1e-06, "loss": -0.0462, "step": 671 }, { "clip_ratio/high_max": 0.0030446229757217225, "clip_ratio/high_mean": 0.0010142238497792277, "clip_ratio/low_mean": 0.0009125107726504211, "clip_ratio/low_min": 7.923427801870275e-06, "clip_ratio/region_mean": 0.0019267346178821754, "epoch": 1.5692621755613883, "grad_norm": 0.18869951367378235, "learning_rate": 1e-06, "loss": -0.0463, "step": 672 }, { "clip_ratio/high_max": 0.0021819696048623882, "clip_ratio/high_mean": 0.0008233503558585653, "clip_ratio/low_mean": 0.0006891033935971791, "clip_ratio/low_min": 8.679349775775336e-06, "clip_ratio/region_mean": 0.001512453796749469, "completions/clipped_ratio": 0.1964285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 1307.53466796875, "completions/mean_terminated_length": 625.9097290039062, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.5715952172645085, "grad_norm": 0.34236404299736023, "learning_rate": 1e-06, "loss": -0.0669, "num_tokens": 100886930.0, "reward": 0.5491071939468384, "reward_std": 0.18595930933952332, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 673 }, { "clip_ratio/high_max": 0.003019629279151559, "clip_ratio/high_mean": 0.0011409016224206425, "clip_ratio/low_mean": 0.001045637009156053, "clip_ratio/low_min": 4.287306728656404e-05, "clip_ratio/region_mean": 0.0021865386806894094, "epoch": 1.5739282589676291, "grad_norm": 0.2636945843696594, "learning_rate": 1e-06, "loss": -0.0673, "step": 674 }, { "clip_ratio/high_max": 0.0031311263737734407, "clip_ratio/high_mean": 0.001144453988672467, "clip_ratio/low_mean": 0.0013594984848168679, "clip_ratio/low_min": 2.5514365916023962e-05, "clip_ratio/region_mean": 0.0025039524407475255, "epoch": 1.5762613006707495, "grad_norm": 0.24746200442314148, "learning_rate": 1e-06, "loss": -0.0675, "step": 675 }, { "clip_ratio/high_max": 0.002691456291358918, "clip_ratio/high_mean": 0.0010489637461432721, "clip_ratio/low_mean": 0.0015580262552248314, "clip_ratio/low_min": 4.2611225580913015e-05, "clip_ratio/region_mean": 0.0026069900413858704, "epoch": 1.57859434237387, "grad_norm": 0.24600592255592346, "learning_rate": 1e-06, "loss": -0.0676, "step": 676 }, { "clip_ratio/high_max": 0.0024583497288404033, "clip_ratio/high_mean": 0.0009814610311877914, "clip_ratio/low_mean": 0.000726520012904075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017079810531868134, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 1136.7723388671875, "completions/mean_terminated_length": 674.7509765625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 1.5809273840769904, "grad_norm": 0.31176334619522095, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 101499030.0, "reward": 0.546875, "reward_std": 0.188248410820961, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 677 }, { "clip_ratio/high_max": 0.0029717501529376023, "clip_ratio/high_mean": 0.0011934057911275886, "clip_ratio/low_mean": 0.0011119712435174733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002305377049196977, "epoch": 1.5832604257801108, "grad_norm": 0.27647027373313904, "learning_rate": 1e-06, "loss": -0.0518, "step": 678 }, { "clip_ratio/high_max": 0.003101067639363464, "clip_ratio/high_mean": 0.0012359481625026092, "clip_ratio/low_mean": 0.0013322515715117333, "clip_ratio/low_min": 2.84349407593254e-05, "clip_ratio/region_mean": 0.0025681997431092896, "epoch": 1.5855934674832313, "grad_norm": 0.25151047110557556, "learning_rate": 1e-06, "loss": -0.052, "step": 679 }, { "clip_ratio/high_max": 0.0029707483190577477, "clip_ratio/high_mean": 0.0011499096181069035, "clip_ratio/low_mean": 0.001467522828534129, "clip_ratio/low_min": 2.84349407593254e-05, "clip_ratio/region_mean": 0.002617432506667683, "epoch": 1.5879265091863517, "grad_norm": 0.23571977019309998, "learning_rate": 1e-06, "loss": -0.0521, "step": 680 }, { "clip_ratio/high_max": 0.002559652282798197, "clip_ratio/high_mean": 0.0010860990314540686, "clip_ratio/low_mean": 0.0007604357206218992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018465347384335473, "completions/clipped_ratio": 0.2008928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 1334.6796875, "completions/mean_terminated_length": 640.4929809570312, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 1.5902595508894721, "grad_norm": 0.29189226031303406, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 102048711.0, "reward": 0.5245535969734192, "reward_std": 0.1755138486623764, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 681 }, { "clip_ratio/high_max": 0.002816863951011328, "clip_ratio/high_mean": 0.0011096646394435083, "clip_ratio/low_mean": 0.0011371720538591035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002246836684207665, "epoch": 1.5925925925925926, "grad_norm": 0.25846952199935913, "learning_rate": 1e-06, "loss": -0.0749, "step": 682 }, { "clip_ratio/high_max": 0.003701198067574296, "clip_ratio/high_mean": 0.0013540929376176791, "clip_ratio/low_mean": 0.001283191236325365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026372841457487084, "epoch": 1.594925634295713, "grad_norm": 0.20855620503425598, "learning_rate": 1e-06, "loss": -0.0752, "step": 683 }, { "clip_ratio/high_max": 0.003204186574293999, "clip_ratio/high_mean": 0.001233112501722644, "clip_ratio/low_mean": 0.001455334067941294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026884465478360653, "epoch": 1.5972586759988334, "grad_norm": 0.21366524696350098, "learning_rate": 1e-06, "loss": -0.0752, "step": 684 }, { "clip_ratio/high_max": 0.0025050155745702796, "clip_ratio/high_mean": 0.0009705617230792996, "clip_ratio/low_mean": 0.0006182609540701378, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015888227062532678, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3316.0, "completions/mean_length": 1324.4910888671875, "completions/mean_terminated_length": 684.912109375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.599591717701954, "grad_norm": 0.2937885522842407, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 102639079.0, "reward": 0.5613839626312256, "reward_std": 0.20324954390525818, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 685 }, { "clip_ratio/high_max": 0.003114350685791578, "clip_ratio/high_mean": 0.001153579549281858, "clip_ratio/low_mean": 0.0008343398549186531, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019879194151144475, "epoch": 1.6019247594050743, "grad_norm": 0.22430022060871124, "learning_rate": 1e-06, "loss": -0.0603, "step": 686 }, { "clip_ratio/high_max": 0.002859890679246746, "clip_ratio/high_mean": 0.0012254288703843486, "clip_ratio/low_mean": 0.0010812852524395566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023067141009960324, "epoch": 1.604257801108195, "grad_norm": 0.2645739018917084, "learning_rate": 1e-06, "loss": -0.0604, "step": 687 }, { "clip_ratio/high_max": 0.0028396494963089935, "clip_ratio/high_mean": 0.001110363347834209, "clip_ratio/low_mean": 0.0012552885236800648, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023656518242205493, "epoch": 1.6065908428113151, "grad_norm": 0.25388264656066895, "learning_rate": 1e-06, "loss": -0.0604, "step": 688 }, { "clip_ratio/high_max": 0.002372793183894828, "clip_ratio/high_mean": 0.0010088597427966306, "clip_ratio/low_mean": 0.0006163730804473744, "clip_ratio/low_min": 4.205921868560836e-05, "clip_ratio/region_mean": 0.0016252328423433937, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3867.0, "completions/mean_length": 1164.2421875, "completions/mean_terminated_length": 625.9141845703125, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 1.6089238845144358, "grad_norm": 0.2801320552825928, "learning_rate": 1e-06, "loss": -0.0958, "num_tokens": 103193344.0, "reward": 0.5714285969734192, "reward_std": 0.19068947434425354, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 689 }, { "clip_ratio/high_max": 0.0032055882111308165, "clip_ratio/high_mean": 0.0013251628406578675, "clip_ratio/low_mean": 0.0007281958114617737, "clip_ratio/low_min": 1.842570782173425e-05, "clip_ratio/region_mean": 0.0020533585993689485, "epoch": 1.611256926217556, "grad_norm": 0.23484808206558228, "learning_rate": 1e-06, "loss": -0.096, "step": 690 }, { "clip_ratio/high_max": 0.0031799277930986136, "clip_ratio/high_mean": 0.0013861646511941217, "clip_ratio/low_mean": 0.0009580936939528328, "clip_ratio/low_min": 4.6064269554335624e-05, "clip_ratio/region_mean": 0.002344258398807142, "epoch": 1.6135899679206767, "grad_norm": 0.21078988909721375, "learning_rate": 1e-06, "loss": -0.0962, "step": 691 }, { "clip_ratio/high_max": 0.0031392294986289926, "clip_ratio/high_mean": 0.0013050017587374896, "clip_ratio/low_mean": 0.0011217644498628943, "clip_ratio/low_min": 4.6064269554335624e-05, "clip_ratio/region_mean": 0.002426766171993222, "epoch": 1.6159230096237969, "grad_norm": 0.23015913367271423, "learning_rate": 1e-06, "loss": -0.0962, "step": 692 }, { "clip_ratio/high_max": 0.0019772380546783097, "clip_ratio/high_mean": 0.0007786658425175119, "clip_ratio/low_mean": 0.0005568928290813346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001335558725259034, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3617.0, "completions/mean_length": 1319.727783203125, "completions/mean_terminated_length": 602.2640380859375, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 1.6182560513269175, "grad_norm": 0.30670201778411865, "learning_rate": 1e-06, "loss": -0.1051, "num_tokens": 103712468.0, "reward": 0.5502232313156128, "reward_std": 0.18032027781009674, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 693 }, { "clip_ratio/high_max": 0.0029129241302143782, "clip_ratio/high_mean": 0.0011786021677835379, "clip_ratio/low_mean": 0.0008487720242555952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002027374204772059, "epoch": 1.620589093030038, "grad_norm": 0.3664519786834717, "learning_rate": 1e-06, "loss": -0.1054, "step": 694 }, { "clip_ratio/high_max": 0.002840742585249245, "clip_ratio/high_mean": 0.0011297030687273946, "clip_ratio/low_mean": 0.0009428865196241532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020725895592477173, "epoch": 1.6229221347331584, "grad_norm": 0.25414541363716125, "learning_rate": 1e-06, "loss": -0.1056, "step": 695 }, { "clip_ratio/high_max": 0.0028106456229579635, "clip_ratio/high_mean": 0.0011537669524841476, "clip_ratio/low_mean": 0.0011728714871424017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023266384232556447, "epoch": 1.6252551764362788, "grad_norm": 0.24437932670116425, "learning_rate": 1e-06, "loss": -0.1057, "step": 696 }, { "clip_ratio/high_max": 0.002399275341304019, "clip_ratio/high_mean": 0.0008898892119759694, "clip_ratio/low_mean": 0.000654937247873022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001544826453027781, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3361.0, "completions/mean_length": 1324.7366943359375, "completions/mean_terminated_length": 642.5202026367188, "completions/min_length": 196.0, "completions/min_terminated_length": 196.0, "epoch": 1.6275882181393992, "grad_norm": 0.31290677189826965, "learning_rate": 1e-06, "loss": -0.0769, "num_tokens": 104282808.0, "reward": 0.5223214626312256, "reward_std": 0.1779131293296814, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 697 }, { "clip_ratio/high_max": 0.0031805944818188436, "clip_ratio/high_mean": 0.0011535340945556527, "clip_ratio/low_mean": 0.0007945833312987816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019481174167594872, "epoch": 1.6299212598425197, "grad_norm": 0.23811165988445282, "learning_rate": 1e-06, "loss": -0.0771, "step": 698 }, { "clip_ratio/high_max": 0.0027703681626007892, "clip_ratio/high_mean": 0.0010411006078356877, "clip_ratio/low_mean": 0.0009474635662627406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019885641668224707, "epoch": 1.63225430154564, "grad_norm": 0.2243320345878601, "learning_rate": 1e-06, "loss": -0.0772, "step": 699 }, { "clip_ratio/high_max": 0.0030867442983435467, "clip_ratio/high_mean": 0.0010878828761633486, "clip_ratio/low_mean": 0.001150815664914262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002238698521978222, "epoch": 1.6345873432487605, "grad_norm": 0.18212801218032837, "learning_rate": 1e-06, "loss": -0.0774, "step": 700 }, { "clip_ratio/high_max": 0.0026252361203660257, "clip_ratio/high_mean": 0.0010942014032480074, "clip_ratio/low_mean": 0.0005422721651484608, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001636473556573037, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3663.0, "completions/mean_length": 1305.7723388671875, "completions/mean_terminated_length": 661.8736572265625, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.636920384951881, "grad_norm": 0.33585652709007263, "learning_rate": 1e-06, "loss": -0.0916, "num_tokens": 104854956.0, "reward": 0.5725446939468384, "reward_std": 0.1841912716627121, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 701 }, { "clip_ratio/high_max": 0.00304038253671024, "clip_ratio/high_mean": 0.0013526170332625043, "clip_ratio/low_mean": 0.0007281387952389196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020807558539672755, "epoch": 1.6392534266550016, "grad_norm": 0.24745501577854156, "learning_rate": 1e-06, "loss": -0.0918, "step": 702 }, { "clip_ratio/high_max": 0.002915855307946913, "clip_ratio/high_mean": 0.0012921984380227514, "clip_ratio/low_mean": 0.0009440897629247047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002236288222775329, "epoch": 1.6415864683581218, "grad_norm": 0.2093912959098816, "learning_rate": 1e-06, "loss": -0.0921, "step": 703 }, { "clip_ratio/high_max": 0.0027673358854372054, "clip_ratio/high_mean": 0.0013007218585698865, "clip_ratio/low_mean": 0.0010847195226233453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023854413448134437, "epoch": 1.6439195100612425, "grad_norm": 0.2719499170780182, "learning_rate": 1e-06, "loss": -0.0922, "step": 704 }, { "clip_ratio/high_max": 0.0023966357875906397, "clip_ratio/high_mean": 0.0009143912266154075, "clip_ratio/low_mean": 0.0007818282201697002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016962194422376342, "completions/clipped_ratio": 0.2165178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 1385.1864013671875, "completions/mean_terminated_length": 636.0441284179688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.6462525517643627, "grad_norm": 0.34167617559432983, "learning_rate": 1e-06, "loss": -0.0604, "num_tokens": 105392075.0, "reward": 0.4654017984867096, "reward_std": 0.19155599176883698, "rewards/verify_math_reward/mean": 0.4654017984867096, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 705 }, { "clip_ratio/high_max": 0.003016867267433554, "clip_ratio/high_mean": 0.0011957475217059255, "clip_ratio/low_mean": 0.0011352539168001385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023310014148592018, "epoch": 1.6485855934674833, "grad_norm": 0.300221711397171, "learning_rate": 1e-06, "loss": -0.0606, "step": 706 }, { "clip_ratio/high_max": 0.002963373059174046, "clip_ratio/high_mean": 0.0010386884860054124, "clip_ratio/low_mean": 0.0012534869038063334, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002292175340699032, "epoch": 1.6509186351706036, "grad_norm": 0.25997117161750793, "learning_rate": 1e-06, "loss": -0.0609, "step": 707 }, { "clip_ratio/high_max": 0.002642484017997049, "clip_ratio/high_mean": 0.001068166990080499, "clip_ratio/low_mean": 0.0016632107199257007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002731377651798539, "epoch": 1.6532516768737242, "grad_norm": 0.29943886399269104, "learning_rate": 1e-06, "loss": -0.0609, "step": 708 }, { "clip_ratio/high_max": 0.0024212583593907766, "clip_ratio/high_mean": 0.0010675480953068472, "clip_ratio/low_mean": 0.0004458831745068892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015134312634472735, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3461.0, "completions/mean_length": 1296.90185546875, "completions/mean_terminated_length": 660.39453125, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.6555847185768444, "grad_norm": 0.30853670835494995, "learning_rate": 1e-06, "loss": -0.0938, "num_tokens": 105957619.0, "reward": 0.5859375, "reward_std": 0.19959722459316254, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 709 }, { "clip_ratio/high_max": 0.003249817520554643, "clip_ratio/high_mean": 0.0013167445504222997, "clip_ratio/low_mean": 0.0007628459834450041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020795905438717455, "epoch": 1.657917760279965, "grad_norm": 0.2825876772403717, "learning_rate": 1e-06, "loss": -0.0941, "step": 710 }, { "clip_ratio/high_max": 0.0033827013758127578, "clip_ratio/high_mean": 0.001281385702895932, "clip_ratio/low_mean": 0.0009322790992882801, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002213664738519583, "epoch": 1.6602508019830855, "grad_norm": 0.21970489621162415, "learning_rate": 1e-06, "loss": -0.0942, "step": 711 }, { "clip_ratio/high_max": 0.003062765797949396, "clip_ratio/high_mean": 0.00115851747978013, "clip_ratio/low_mean": 0.001118047950512846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022765654284739867, "epoch": 1.662583843686206, "grad_norm": 0.22429729998111725, "learning_rate": 1e-06, "loss": -0.0943, "step": 712 }, { "clip_ratio/high_max": 0.002440858632326126, "clip_ratio/high_mean": 0.0009094994184124516, "clip_ratio/low_mean": 0.0006458879433921538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015553873381577432, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3330.0, "completions/mean_length": 1200.372802734375, "completions/mean_terminated_length": 608.7930297851562, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.6649168853893264, "grad_norm": 0.3057929575443268, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 106492745.0, "reward": 0.5446428656578064, "reward_std": 0.18821631371974945, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 713 }, { "clip_ratio/high_max": 0.0029230081163404975, "clip_ratio/high_mean": 0.0011955939498875523, "clip_ratio/low_mean": 0.0008712396156624891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020668336073867977, "epoch": 1.6672499270924468, "grad_norm": 0.23520135879516602, "learning_rate": 1e-06, "loss": -0.0552, "step": 714 }, { "clip_ratio/high_max": 0.0028584764659171924, "clip_ratio/high_mean": 0.0011451138452684972, "clip_ratio/low_mean": 0.0010565156408119947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022016294751665555, "epoch": 1.6695829687955672, "grad_norm": 0.23191626369953156, "learning_rate": 1e-06, "loss": -0.0554, "step": 715 }, { "clip_ratio/high_max": 0.0028426450662664138, "clip_ratio/high_mean": 0.0010478730182512663, "clip_ratio/low_mean": 0.001281688870221842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002329561953956727, "epoch": 1.6719160104986877, "grad_norm": 0.24943673610687256, "learning_rate": 1e-06, "loss": -0.0555, "step": 716 }, { "clip_ratio/high_max": 0.0026324251739424653, "clip_ratio/high_mean": 0.0009574365285516251, "clip_ratio/low_mean": 0.0007995056948857382, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001756942248903215, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 1261.8951416015625, "completions/mean_terminated_length": 607.8709106445312, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 1.674249052201808, "grad_norm": 0.3139522969722748, "learning_rate": 1e-06, "loss": -0.0796, "num_tokens": 107022179.0, "reward": 0.5558035969734192, "reward_std": 0.18125459551811218, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 717 }, { "clip_ratio/high_max": 0.003595037505874643, "clip_ratio/high_mean": 0.0011868317760672653, "clip_ratio/low_mean": 0.0010414816260890802, "clip_ratio/low_min": 2.4811433831928298e-05, "clip_ratio/region_mean": 0.0022283133985183667, "epoch": 1.6765820939049285, "grad_norm": 0.28548964858055115, "learning_rate": 1e-06, "loss": -0.08, "step": 718 }, { "clip_ratio/high_max": 0.0035372318197914865, "clip_ratio/high_mean": 0.001203651931064087, "clip_ratio/low_mean": 0.0012385282934701536, "clip_ratio/low_min": 5.047351623943541e-05, "clip_ratio/region_mean": 0.002442180208163336, "epoch": 1.678915135608049, "grad_norm": 0.24397175014019012, "learning_rate": 1e-06, "loss": -0.0802, "step": 719 }, { "clip_ratio/high_max": 0.0033241948622162454, "clip_ratio/high_mean": 0.0010891852689383086, "clip_ratio/low_mean": 0.0014839978211966809, "clip_ratio/low_min": 3.90482464354136e-05, "clip_ratio/region_mean": 0.0025731830755830742, "epoch": 1.6812481773111694, "grad_norm": 0.2252047061920166, "learning_rate": 1e-06, "loss": -0.0802, "step": 720 }, { "clip_ratio/high_max": 0.0020865477781626396, "clip_ratio/high_mean": 0.0007889104326750385, "clip_ratio/low_mean": 0.0005723328638396197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013612432612717384, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3685.0, "completions/mean_length": 1201.063720703125, "completions/mean_terminated_length": 614.3047485351562, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 1.68358121901429, "grad_norm": 0.4459717273712158, "learning_rate": 1e-06, "loss": -0.072, "num_tokens": 107562684.0, "reward": 0.5803571939468384, "reward_std": 0.17078480124473572, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 721 }, { "clip_ratio/high_max": 0.002799970516207395, "clip_ratio/high_mean": 0.0010848278343473794, "clip_ratio/low_mean": 0.0008371281196559721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019219559399061836, "epoch": 1.6859142607174102, "grad_norm": 0.2520245909690857, "learning_rate": 1e-06, "loss": -0.0723, "step": 722 }, { "clip_ratio/high_max": 0.002626189547299873, "clip_ratio/high_mean": 0.0010460953490110114, "clip_ratio/low_mean": 0.000988977945780789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002035073241131613, "epoch": 1.688247302420531, "grad_norm": 0.2172216922044754, "learning_rate": 1e-06, "loss": -0.0725, "step": 723 }, { "clip_ratio/high_max": 0.0027866967429872602, "clip_ratio/high_mean": 0.0009972511488740565, "clip_ratio/low_mean": 0.0012106410231353948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022078921720094513, "epoch": 1.690580344123651, "grad_norm": 0.23183082044124603, "learning_rate": 1e-06, "loss": -0.0725, "step": 724 }, { "clip_ratio/high_max": 0.0023170647109509446, "clip_ratio/high_mean": 0.0008763284749875311, "clip_ratio/low_mean": 0.0005670832501891709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014434117256314494, "completions/clipped_ratio": 0.2165178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3758.0, "completions/mean_length": 1413.1998291015625, "completions/mean_terminated_length": 671.7991333007812, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 1.6929133858267718, "grad_norm": 0.30320650339126587, "learning_rate": 1e-06, "loss": -0.0736, "num_tokens": 108138791.0, "reward": 0.4910714626312256, "reward_std": 0.16653545200824738, "rewards/verify_math_reward/mean": 0.4910714328289032, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 725 }, { "clip_ratio/high_max": 0.003044892131583765, "clip_ratio/high_mean": 0.0011829018894786714, "clip_ratio/low_mean": 0.0008012801918084733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019841820576402824, "epoch": 1.695246427529892, "grad_norm": 0.2436947077512741, "learning_rate": 1e-06, "loss": -0.0739, "step": 726 }, { "clip_ratio/high_max": 0.0032057481730589643, "clip_ratio/high_mean": 0.0011772155739890877, "clip_ratio/low_mean": 0.0009244151933671674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021016307546233293, "epoch": 1.6975794692330126, "grad_norm": 0.21844151616096497, "learning_rate": 1e-06, "loss": -0.074, "step": 727 }, { "clip_ratio/high_max": 0.002946708300441969, "clip_ratio/high_mean": 0.0011590781032282393, "clip_ratio/low_mean": 0.0011810414580395445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002340119535801932, "epoch": 1.6999125109361328, "grad_norm": 0.19436538219451904, "learning_rate": 1e-06, "loss": -0.0741, "step": 728 }, { "clip_ratio/high_max": 0.002840550339897163, "clip_ratio/high_mean": 0.0010615747232805006, "clip_ratio/low_mean": 0.0006888908933433413, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001750465566146886, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 1066.88623046875, "completions/mean_terminated_length": 584.8926391601562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 1.7022455526392535, "grad_norm": 0.3603469431400299, "learning_rate": 1e-06, "loss": -0.0625, "num_tokens": 108686689.0, "reward": 0.6272321939468384, "reward_std": 0.1584189236164093, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 729 }, { "clip_ratio/high_max": 0.00358756499554147, "clip_ratio/high_mean": 0.001336411858574138, "clip_ratio/low_mean": 0.001047273280164518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023836851287342142, "epoch": 1.704578594342374, "grad_norm": 0.3039668798446655, "learning_rate": 1e-06, "loss": -0.0628, "step": 730 }, { "clip_ratio/high_max": 0.0037036568028270267, "clip_ratio/high_mean": 0.001329478091065539, "clip_ratio/low_mean": 0.0013035761201081186, "clip_ratio/low_min": 1.1415525477787014e-05, "clip_ratio/region_mean": 0.00263305424596183, "epoch": 1.7069116360454943, "grad_norm": 0.27185073494911194, "learning_rate": 1e-06, "loss": -0.0629, "step": 731 }, { "clip_ratio/high_max": 0.0032777939195511863, "clip_ratio/high_mean": 0.001269702181161847, "clip_ratio/low_mean": 0.0014182024151523365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026879046636167914, "epoch": 1.7092446777486148, "grad_norm": 0.2635992765426636, "learning_rate": 1e-06, "loss": -0.063, "step": 732 }, { "clip_ratio/high_max": 0.0027937005579588003, "clip_ratio/high_mean": 0.0009766038310772274, "clip_ratio/low_mean": 0.0004562619997159345, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014328658253361937, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 1386.69873046875, "completions/mean_terminated_length": 672.1156616210938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.7115777194517352, "grad_norm": 0.2532927393913269, "learning_rate": 1e-06, "loss": -0.1131, "num_tokens": 109256067.0, "reward": 0.5223214626312256, "reward_std": 0.16014528274536133, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 733 }, { "clip_ratio/high_max": 0.003094894185778685, "clip_ratio/high_mean": 0.0010996055279974826, "clip_ratio/low_mean": 0.0005157897130629863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016153952456079423, "epoch": 1.7139107611548556, "grad_norm": 0.27027636766433716, "learning_rate": 1e-06, "loss": -0.1133, "step": 734 }, { "clip_ratio/high_max": 0.0035217979311710224, "clip_ratio/high_mean": 0.0012316461052250816, "clip_ratio/low_mean": 0.0007063940056468709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019380400262889452, "epoch": 1.716243802857976, "grad_norm": 0.20090660452842712, "learning_rate": 1e-06, "loss": -0.1135, "step": 735 }, { "clip_ratio/high_max": 0.0032341975238523446, "clip_ratio/high_mean": 0.001142957709816983, "clip_ratio/low_mean": 0.0009098773225559853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020528350796666928, "epoch": 1.7185768445610965, "grad_norm": 0.19518056511878967, "learning_rate": 1e-06, "loss": -0.1136, "step": 736 }, { "clip_ratio/high_max": 0.0020187238551443443, "clip_ratio/high_mean": 0.0007821783510735258, "clip_ratio/low_mean": 0.000512348198753898, "clip_ratio/low_min": 1.1285662367299665e-05, "clip_ratio/region_mean": 0.001294526558922371, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 1289.1820068359375, "completions/mean_terminated_length": 650.919189453125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.720909886264217, "grad_norm": 0.27669376134872437, "learning_rate": 1e-06, "loss": -0.0785, "num_tokens": 109817022.0, "reward": 0.5111607313156128, "reward_std": 0.16732315719127655, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 737 }, { "clip_ratio/high_max": 0.0022142234665807337, "clip_ratio/high_mean": 0.000889992157681263, "clip_ratio/low_mean": 0.0007480007707272307, "clip_ratio/low_min": 2.4206041416618973e-05, "clip_ratio/region_mean": 0.00163799295478384, "epoch": 1.7232429279673376, "grad_norm": 0.26892563700675964, "learning_rate": 1e-06, "loss": -0.0787, "step": 738 }, { "clip_ratio/high_max": 0.002578092593466863, "clip_ratio/high_mean": 0.0010435152689751703, "clip_ratio/low_mean": 0.0008472703430015827, "clip_ratio/low_min": 2.257132473459933e-05, "clip_ratio/region_mean": 0.0018907856065197848, "epoch": 1.7255759696704578, "grad_norm": 0.1900540441274643, "learning_rate": 1e-06, "loss": -0.079, "step": 739 }, { "clip_ratio/high_max": 0.0020848819622187875, "clip_ratio/high_mean": 0.0008317502379213693, "clip_ratio/low_mean": 0.001126940247559105, "clip_ratio/low_min": 3.3856988011393696e-05, "clip_ratio/region_mean": 0.0019586904818424955, "epoch": 1.7279090113735784, "grad_norm": 0.1855372041463852, "learning_rate": 1e-06, "loss": -0.079, "step": 740 }, { "clip_ratio/high_max": 0.002750308034592308, "clip_ratio/high_mean": 0.001090041017960175, "clip_ratio/low_mean": 0.0005552698330575367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016453108364657965, "completions/clipped_ratio": 0.2209821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3175.0, "completions/mean_length": 1397.0457763671875, "completions/mean_terminated_length": 631.4398803710938, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 1.7302420530766986, "grad_norm": 0.37509623169898987, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 110349615.0, "reward": 0.4687500298023224, "reward_std": 0.164472758769989, "rewards/verify_math_reward/mean": 0.46875, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 741 }, { "clip_ratio/high_max": 0.00314707585494034, "clip_ratio/high_mean": 0.001330789462372195, "clip_ratio/low_mean": 0.0007947428930492606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021255323226796463, "epoch": 1.7325750947798193, "grad_norm": 0.2796333134174347, "learning_rate": 1e-06, "loss": -0.0534, "step": 742 }, { "clip_ratio/high_max": 0.003252979200624395, "clip_ratio/high_mean": 0.0013329988105397206, "clip_ratio/low_mean": 0.0010988753110723337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002431874083413277, "epoch": 1.7349081364829395, "grad_norm": 0.21326813101768494, "learning_rate": 1e-06, "loss": -0.0536, "step": 743 }, { "clip_ratio/high_max": 0.003149357784423046, "clip_ratio/high_mean": 0.0013002731720916927, "clip_ratio/low_mean": 0.0012321688736847136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025324420566903427, "epoch": 1.7372411781860602, "grad_norm": 0.2357005923986435, "learning_rate": 1e-06, "loss": -0.0537, "step": 744 }, { "clip_ratio/high_max": 0.0026869889843510464, "clip_ratio/high_mean": 0.0009339023745269515, "clip_ratio/low_mean": 0.00044741597776010167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013813183722959366, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2385.0, "completions/mean_length": 1078.5848388671875, "completions/mean_terminated_length": 607.478759765625, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 1.7395742198891804, "grad_norm": 0.3794609606266022, "learning_rate": 1e-06, "loss": -0.0508, "num_tokens": 110908155.0, "reward": 0.6272321939468384, "reward_std": 0.16160815954208374, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 745 }, { "clip_ratio/high_max": 0.0030135468659864273, "clip_ratio/high_mean": 0.0011095243444287917, "clip_ratio/low_mean": 0.000746576579331304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018561009055702016, "epoch": 1.741907261592301, "grad_norm": 0.25194844603538513, "learning_rate": 1e-06, "loss": -0.0512, "step": 746 }, { "clip_ratio/high_max": 0.003536265438015107, "clip_ratio/high_mean": 0.0012160383303125855, "clip_ratio/low_mean": 0.0007701759050178225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019862142580677755, "epoch": 1.7442403032954215, "grad_norm": 0.2876931428909302, "learning_rate": 1e-06, "loss": -0.0513, "step": 747 }, { "clip_ratio/high_max": 0.003463274500973057, "clip_ratio/high_mean": 0.0012149306567152962, "clip_ratio/low_mean": 0.0010488517327758018, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002263782385853119, "epoch": 1.7465733449985419, "grad_norm": 0.21975436806678772, "learning_rate": 1e-06, "loss": -0.0515, "step": 748 }, { "clip_ratio/high_max": 0.0023062533073243685, "clip_ratio/high_mean": 0.0008066467999015003, "clip_ratio/low_mean": 0.0005605802763284373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013672271052200813, "completions/clipped_ratio": 0.2254464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3654.0, "completions/mean_length": 1421.69873046875, "completions/mean_terminated_length": 643.2996826171875, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 1.7489063867016623, "grad_norm": 0.3322184681892395, "learning_rate": 1e-06, "loss": -0.0811, "num_tokens": 111445621.0, "reward": 0.53125, "reward_std": 0.1724012941122055, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 749 }, { "clip_ratio/high_max": 0.003089862053457182, "clip_ratio/high_mean": 0.0010738891160144703, "clip_ratio/low_mean": 0.0006956881975384022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017695772912702523, "epoch": 1.7512394284047827, "grad_norm": 0.3031192421913147, "learning_rate": 1e-06, "loss": -0.0814, "step": 750 }, { "clip_ratio/high_max": 0.0025417940341867507, "clip_ratio/high_mean": 0.0009615219769329997, "clip_ratio/low_mean": 0.0009298260465584463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001891348001663573, "epoch": 1.7535724701079032, "grad_norm": 0.21803531050682068, "learning_rate": 1e-06, "loss": -0.0816, "step": 751 }, { "clip_ratio/high_max": 0.0026612037072482053, "clip_ratio/high_mean": 0.0009760259836184559, "clip_ratio/low_mean": 0.001015680973068811, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001991706929402426, "epoch": 1.7559055118110236, "grad_norm": 0.224185049533844, "learning_rate": 1e-06, "loss": -0.0817, "step": 752 }, { "clip_ratio/high_max": 0.002623805237817578, "clip_ratio/high_mean": 0.0009534142445772886, "clip_ratio/low_mean": 0.0004241333190293517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013775475563306827, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 1335.7723388671875, "completions/mean_terminated_length": 622.455078125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.758238553514144, "grad_norm": 0.30206793546676636, "learning_rate": 1e-06, "loss": -0.0629, "num_tokens": 111985497.0, "reward": 0.5569196939468384, "reward_std": 0.1587110459804535, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 753 }, { "clip_ratio/high_max": 0.0035839638221659698, "clip_ratio/high_mean": 0.0013280071470944677, "clip_ratio/low_mean": 0.0007448215110343881, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020728286472149193, "epoch": 1.7605715952172645, "grad_norm": 0.2561754286289215, "learning_rate": 1e-06, "loss": -0.0632, "step": 754 }, { "clip_ratio/high_max": 0.003633407446614001, "clip_ratio/high_mean": 0.001320830335316714, "clip_ratio/low_mean": 0.0008764087033341639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021972390604787506, "epoch": 1.762904636920385, "grad_norm": 0.23033028841018677, "learning_rate": 1e-06, "loss": -0.0634, "step": 755 }, { "clip_ratio/high_max": 0.0034799278510035947, "clip_ratio/high_mean": 0.0012372326200420503, "clip_ratio/low_mean": 0.0010334521684853826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022706847521476448, "epoch": 1.7652376786235053, "grad_norm": 0.221846804022789, "learning_rate": 1e-06, "loss": -0.0634, "step": 756 }, { "clip_ratio/high_max": 0.002665332518517971, "clip_ratio/high_mean": 0.0010277658730046824, "clip_ratio/low_mean": 0.0007625566595379496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017903225743793882, "completions/clipped_ratio": 0.1908482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4020.0, "completions/mean_length": 1378.73779296875, "completions/mean_terminated_length": 737.838623046875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.767570720326626, "grad_norm": 0.3178108036518097, "learning_rate": 1e-06, "loss": -0.0708, "num_tokens": 112605214.0, "reward": 0.5334821939468384, "reward_std": 0.20185355842113495, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 757 }, { "clip_ratio/high_max": 0.0033235828086617403, "clip_ratio/high_mean": 0.0011530254741956014, "clip_ratio/low_mean": 0.0010216206592303934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002174646080675302, "epoch": 1.7699037620297462, "grad_norm": 0.26831531524658203, "learning_rate": 1e-06, "loss": -0.071, "step": 758 }, { "clip_ratio/high_max": 0.0035036832850892097, "clip_ratio/high_mean": 0.0012240288524481002, "clip_ratio/low_mean": 0.0011186093088326743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023426382322213612, "epoch": 1.7722368037328668, "grad_norm": 0.2287055402994156, "learning_rate": 1e-06, "loss": -0.0713, "step": 759 }, { "clip_ratio/high_max": 0.002821169931848999, "clip_ratio/high_mean": 0.0010705337044782937, "clip_ratio/low_mean": 0.0013855133893230231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024560470701544546, "epoch": 1.774569845435987, "grad_norm": 0.2927832305431366, "learning_rate": 1e-06, "loss": -0.0713, "step": 760 }, { "clip_ratio/high_max": 0.0017815241080825217, "clip_ratio/high_mean": 0.0006108197103458224, "clip_ratio/low_mean": 0.000504789546084794, "clip_ratio/low_min": 1.4180373909766786e-05, "clip_ratio/region_mean": 0.0011156092550663743, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3427.0, "completions/mean_length": 1096.298095703125, "completions/mean_terminated_length": 632.426513671875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.7769028871391077, "grad_norm": 0.28425121307373047, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 113186329.0, "reward": 0.5959821939468384, "reward_std": 0.14324188232421875, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 761 }, { "clip_ratio/high_max": 0.002119680393661838, "clip_ratio/high_mean": 0.0007032340054138331, "clip_ratio/low_mean": 0.0007346917317363477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014379257372638676, "epoch": 1.779235928842228, "grad_norm": 0.18750505149364471, "learning_rate": 1e-06, "loss": -0.0625, "step": 762 }, { "clip_ratio/high_max": 0.0024135289568221197, "clip_ratio/high_mean": 0.0008099280639726203, "clip_ratio/low_mean": 0.0007933280356837713, "clip_ratio/low_min": 2.127056177414488e-05, "clip_ratio/region_mean": 0.00160325612523593, "epoch": 1.7815689705453486, "grad_norm": 0.24141138792037964, "learning_rate": 1e-06, "loss": -0.0626, "step": 763 }, { "clip_ratio/high_max": 0.0019172006941516884, "clip_ratio/high_mean": 0.0007264084797498072, "clip_ratio/low_mean": 0.0009916465546666586, "clip_ratio/low_min": 2.127056177414488e-05, "clip_ratio/region_mean": 0.001718055016681319, "epoch": 1.7839020122484688, "grad_norm": 0.17011037468910217, "learning_rate": 1e-06, "loss": -0.0627, "step": 764 }, { "clip_ratio/high_max": 0.0029642568333656527, "clip_ratio/high_mean": 0.0011997792062174994, "clip_ratio/low_mean": 0.0006717288924846798, "clip_ratio/low_min": 1.4169122550811153e-05, "clip_ratio/region_mean": 0.0018715080877882428, "completions/clipped_ratio": 0.2087053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 1340.204345703125, "completions/mean_terminated_length": 613.3582763671875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.7862350539515894, "grad_norm": 0.3163980543613434, "learning_rate": 1e-06, "loss": -0.124, "num_tokens": 113704848.0, "reward": 0.5424107313156128, "reward_std": 0.20662352442741394, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 765 }, { "clip_ratio/high_max": 0.003829695677268319, "clip_ratio/high_mean": 0.00148837465167162, "clip_ratio/low_mean": 0.000826685465654009, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002315060126420576, "epoch": 1.7885680956547099, "grad_norm": 0.3120717406272888, "learning_rate": 1e-06, "loss": -0.1242, "step": 766 }, { "clip_ratio/high_max": 0.003974874533014372, "clip_ratio/high_mean": 0.001538831988000311, "clip_ratio/low_mean": 0.0011645123013295233, "clip_ratio/low_min": 2.295262493134942e-05, "clip_ratio/region_mean": 0.0027033442820538767, "epoch": 1.7909011373578303, "grad_norm": 0.26164695620536804, "learning_rate": 1e-06, "loss": -0.1245, "step": 767 }, { "clip_ratio/high_max": 0.003672715203720145, "clip_ratio/high_mean": 0.001413390549714677, "clip_ratio/low_mean": 0.0012837811300414614, "clip_ratio/low_min": 4.620502659236081e-05, "clip_ratio/region_mean": 0.0026971717015840113, "epoch": 1.7932341790609507, "grad_norm": 0.2773039638996124, "learning_rate": 1e-06, "loss": -0.1245, "step": 768 }, { "clip_ratio/high_max": 0.002676884803804569, "clip_ratio/high_mean": 0.0009917986153595848, "clip_ratio/low_mean": 0.0005035521880927263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014953507634345442, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3903.0, "completions/mean_length": 1422.67529296875, "completions/mean_terminated_length": 693.586669921875, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 1.7955672207640712, "grad_norm": 0.3097780644893646, "learning_rate": 1e-06, "loss": -0.1124, "num_tokens": 114285629.0, "reward": 0.4765625298023224, "reward_std": 0.16796325147151947, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 769 }, { "clip_ratio/high_max": 0.0027745560291805305, "clip_ratio/high_mean": 0.0010920167151198257, "clip_ratio/low_mean": 0.0006124778379898999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017044945670932066, "epoch": 1.7979002624671916, "grad_norm": 0.24015967547893524, "learning_rate": 1e-06, "loss": -0.1124, "step": 770 }, { "clip_ratio/high_max": 0.0031230877866619267, "clip_ratio/high_mean": 0.0010731148413469782, "clip_ratio/low_mean": 0.0008133408282446908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001886455633211881, "epoch": 1.800233304170312, "grad_norm": 0.20473533868789673, "learning_rate": 1e-06, "loss": -0.1127, "step": 771 }, { "clip_ratio/high_max": 0.0028612736641662195, "clip_ratio/high_mean": 0.0010856566805159673, "clip_ratio/low_mean": 0.0009701266862975899, "clip_ratio/low_min": 1.3994625987834297e-05, "clip_ratio/region_mean": 0.002055783334071748, "epoch": 1.8025663458734325, "grad_norm": 0.21435610949993134, "learning_rate": 1e-06, "loss": -0.1127, "step": 772 }, { "clip_ratio/high_max": 0.0022681507507513743, "clip_ratio/high_mean": 0.0008565066564187873, "clip_ratio/low_mean": 0.0005312314214052094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013877380770281889, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 1162.469970703125, "completions/mean_terminated_length": 628.3970947265625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 1.8048993875765529, "grad_norm": 0.3202595114707947, "learning_rate": 1e-06, "loss": -0.0719, "num_tokens": 114847410.0, "reward": 0.5758928656578064, "reward_std": 0.15744182467460632, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448275566101074, "step": 773 }, { "clip_ratio/high_max": 0.0029461818994604982, "clip_ratio/high_mean": 0.0010305059331585653, "clip_ratio/low_mean": 0.0007527953118824371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001783301260729786, "epoch": 1.8072324292796735, "grad_norm": 0.24247294664382935, "learning_rate": 1e-06, "loss": -0.0721, "step": 774 }, { "clip_ratio/high_max": 0.0029159980040276423, "clip_ratio/high_mean": 0.0010033698526967783, "clip_ratio/low_mean": 0.0007841212382118101, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017874911063699983, "epoch": 1.8095654709827937, "grad_norm": 0.19409358501434326, "learning_rate": 1e-06, "loss": -0.0722, "step": 775 }, { "clip_ratio/high_max": 0.0026246054694638588, "clip_ratio/high_mean": 0.0009365742735099047, "clip_ratio/low_mean": 0.0010685592246773012, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020051334613526706, "epoch": 1.8118985126859144, "grad_norm": 0.21823573112487793, "learning_rate": 1e-06, "loss": -0.0723, "step": 776 }, { "clip_ratio/high_max": 0.0020401036454131827, "clip_ratio/high_mean": 0.0008350547459485824, "clip_ratio/low_mean": 0.0006044626568382228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014395173820958007, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3060.0, "completions/mean_length": 1199.04248046875, "completions/mean_terminated_length": 625.8475952148438, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 1.8142315543890346, "grad_norm": 0.2843477725982666, "learning_rate": 1e-06, "loss": -0.0601, "num_tokens": 115399928.0, "reward": 0.5368303656578064, "reward_std": 0.1537223905324936, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 777 }, { "clip_ratio/high_max": 0.002548430689785164, "clip_ratio/high_mean": 0.0010934012916550273, "clip_ratio/low_mean": 0.0008658578972244868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019592591634136625, "epoch": 1.8165645960921553, "grad_norm": 0.24737218022346497, "learning_rate": 1e-06, "loss": -0.0603, "step": 778 }, { "clip_ratio/high_max": 0.002447107202897314, "clip_ratio/high_mean": 0.001076182052202057, "clip_ratio/low_mean": 0.0011107966804502212, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002186978621466551, "epoch": 1.8188976377952755, "grad_norm": 0.21529598534107208, "learning_rate": 1e-06, "loss": -0.0605, "step": 779 }, { "clip_ratio/high_max": 0.0026493881123315077, "clip_ratio/high_mean": 0.001074210747901816, "clip_ratio/low_mean": 0.001178176313487711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022523870793520473, "epoch": 1.8212306794983961, "grad_norm": 0.22326554358005524, "learning_rate": 1e-06, "loss": -0.0605, "step": 780 }, { "clip_ratio/high_max": 0.0021476892434293404, "clip_ratio/high_mean": 0.0007788121292833239, "clip_ratio/low_mean": 0.00047618040298402775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012549925268103834, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 1207.443115234375, "completions/mean_terminated_length": 663.4442749023438, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.8235637212015163, "grad_norm": 0.27336016297340393, "learning_rate": 1e-06, "loss": -0.1015, "num_tokens": 115983037.0, "reward": 0.5870535969734192, "reward_std": 0.16326561570167542, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 781 }, { "clip_ratio/high_max": 0.002120119155733846, "clip_ratio/high_mean": 0.0009483648700552294, "clip_ratio/low_mean": 0.000684274284139974, "clip_ratio/low_min": 1.6382700778194703e-05, "clip_ratio/region_mean": 0.0016326391778420657, "epoch": 1.825896762904637, "grad_norm": 0.19790448248386383, "learning_rate": 1e-06, "loss": -0.1017, "step": 782 }, { "clip_ratio/high_max": 0.0025395368284080178, "clip_ratio/high_mean": 0.0008975396103778621, "clip_ratio/low_mean": 0.0007989680589162163, "clip_ratio/low_min": 3.2765401556389406e-05, "clip_ratio/region_mean": 0.0016965076647466049, "epoch": 1.8282298046077574, "grad_norm": 0.18685676157474518, "learning_rate": 1e-06, "loss": -0.1018, "step": 783 }, { "clip_ratio/high_max": 0.002357502049562754, "clip_ratio/high_mean": 0.000903857222510851, "clip_ratio/low_mean": 0.0008553564457542961, "clip_ratio/low_min": 2.734033296292182e-05, "clip_ratio/region_mean": 0.001759213650075253, "epoch": 1.8305628463108778, "grad_norm": 0.22967809438705444, "learning_rate": 1e-06, "loss": -0.1018, "step": 784 }, { "clip_ratio/high_max": 0.0024036451359279454, "clip_ratio/high_mean": 0.000801718235379667, "clip_ratio/low_mean": 0.0007686801855015801, "clip_ratio/low_min": 1.05645704024937e-05, "clip_ratio/region_mean": 0.0015703984281572048, "completions/clipped_ratio": 0.2120535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 1436.62841796875, "completions/mean_terminated_length": 720.9334106445312, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 1.8328958880139983, "grad_norm": 0.35223352909088135, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 116592824.0, "reward": 0.4598214626312256, "reward_std": 0.1575952023267746, "rewards/verify_math_reward/mean": 0.4598214328289032, "rewards/verify_math_reward/std": 0.4986613988876343, "step": 785 }, { "clip_ratio/high_max": 0.0029588468387373723, "clip_ratio/high_mean": 0.0009684900815045694, "clip_ratio/low_mean": 0.0009765402992343297, "clip_ratio/low_min": 3.16937112074811e-05, "clip_ratio/region_mean": 0.0019450303734629415, "epoch": 1.8352289297171187, "grad_norm": 0.2443126142024994, "learning_rate": 1e-06, "loss": -0.0374, "step": 786 }, { "clip_ratio/high_max": 0.0035525073762983084, "clip_ratio/high_mean": 0.0011505296388349961, "clip_ratio/low_mean": 0.0012322973689151695, "clip_ratio/low_min": 1.05645704024937e-05, "clip_ratio/region_mean": 0.0023828270568628795, "epoch": 1.8375619714202391, "grad_norm": 0.2040264904499054, "learning_rate": 1e-06, "loss": -0.0377, "step": 787 }, { "clip_ratio/high_max": 0.002937351026048418, "clip_ratio/high_mean": 0.001020425286696991, "clip_ratio/low_mean": 0.0013868755704606883, "clip_ratio/low_min": 3.16937112074811e-05, "clip_ratio/region_mean": 0.002407300882623531, "epoch": 1.8398950131233596, "grad_norm": 0.22036604583263397, "learning_rate": 1e-06, "loss": -0.0377, "step": 788 }, { "clip_ratio/high_max": 0.00202498081853264, "clip_ratio/high_mean": 0.0006558133309226832, "clip_ratio/low_mean": 0.0005988032216919237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012546165671665221, "completions/clipped_ratio": 0.1908482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2963.0, "completions/mean_length": 1309.7935791015625, "completions/mean_terminated_length": 652.6331176757812, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 1.84222805482648, "grad_norm": 0.3399260342121124, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 117161519.0, "reward": 0.4754464626312256, "reward_std": 0.13177865743637085, "rewards/verify_math_reward/mean": 0.4754464328289032, "rewards/verify_math_reward/std": 0.4996756315231323, "step": 789 }, { "clip_ratio/high_max": 0.002574367510533193, "clip_ratio/high_mean": 0.0008615906726845424, "clip_ratio/low_mean": 0.0006940032471902668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001555593920784304, "epoch": 1.8445610965296004, "grad_norm": 0.22516852617263794, "learning_rate": 1e-06, "loss": -0.0446, "step": 790 }, { "clip_ratio/high_max": 0.0027376520592952147, "clip_ratio/high_mean": 0.0008576442141929874, "clip_ratio/low_mean": 0.0009341048044007039, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017917489458341151, "epoch": 1.8468941382327209, "grad_norm": 0.19522975385189056, "learning_rate": 1e-06, "loss": -0.0448, "step": 791 }, { "clip_ratio/high_max": 0.0026930035164696164, "clip_ratio/high_mean": 0.0008588131131546106, "clip_ratio/low_mean": 0.0011109570696135052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00196977020095801, "epoch": 1.8492271799358413, "grad_norm": 0.2082953304052353, "learning_rate": 1e-06, "loss": -0.0449, "step": 792 }, { "clip_ratio/high_max": 0.0021975159979774617, "clip_ratio/high_mean": 0.0010656295926310122, "clip_ratio/low_mean": 0.0006331684171527741, "clip_ratio/low_min": 2.886836045945529e-05, "clip_ratio/region_mean": 0.0016987979979603551, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 1188.33935546875, "completions/mean_terminated_length": 608.3641357421875, "completions/min_length": 203.0, "completions/min_terminated_length": 203.0, "epoch": 1.851560221638962, "grad_norm": 0.3365528881549835, "learning_rate": 1e-06, "loss": -0.0678, "num_tokens": 117696535.0, "reward": 0.5558035969734192, "reward_std": 0.19989821314811707, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 793 }, { "clip_ratio/high_max": 0.003083037110627629, "clip_ratio/high_mean": 0.0013537398081098218, "clip_ratio/low_mean": 0.0009749114942678716, "clip_ratio/low_min": 1.9500779671943747e-05, "clip_ratio/region_mean": 0.0023286512951017357, "epoch": 1.8538932633420822, "grad_norm": 0.32683202624320984, "learning_rate": 1e-06, "loss": -0.0681, "step": 794 }, { "clip_ratio/high_max": 0.0028669562161667272, "clip_ratio/high_mean": 0.0013689995339518646, "clip_ratio/low_mean": 0.0012241506410646252, "clip_ratio/low_min": 1.9500779671943747e-05, "clip_ratio/region_mean": 0.002593150180473458, "epoch": 1.8562263050452028, "grad_norm": 0.25984010100364685, "learning_rate": 1e-06, "loss": -0.0683, "step": 795 }, { "clip_ratio/high_max": 0.003130484779831022, "clip_ratio/high_mean": 0.0012664634305110667, "clip_ratio/low_mean": 0.0014365567985805683, "clip_ratio/low_min": 1.9500779671943747e-05, "clip_ratio/region_mean": 0.0027030202036257833, "epoch": 1.858559346748323, "grad_norm": 0.25461524724960327, "learning_rate": 1e-06, "loss": -0.0684, "step": 796 }, { "clip_ratio/high_max": 0.002819614404870663, "clip_ratio/high_mean": 0.0010433404531795532, "clip_ratio/low_mean": 0.000562692410312593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016060328634921461, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 1210.8660888671875, "completions/mean_terminated_length": 644.6248779296875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.8608923884514437, "grad_norm": 0.33243247866630554, "learning_rate": 1e-06, "loss": -0.0822, "num_tokens": 118270751.0, "reward": 0.5625, "reward_std": 0.18307287991046906, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 797 }, { "clip_ratio/high_max": 0.003020628049853258, "clip_ratio/high_mean": 0.0011996221910521854, "clip_ratio/low_mean": 0.0007609488329762826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001960570953087881, "epoch": 1.8632254301545639, "grad_norm": 0.2611001431941986, "learning_rate": 1e-06, "loss": -0.0824, "step": 798 }, { "clip_ratio/high_max": 0.0030056221075938083, "clip_ratio/high_mean": 0.0012223275007272605, "clip_ratio/low_mean": 0.0008960413233580766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021183688295423053, "epoch": 1.8655584718576845, "grad_norm": 0.22762948274612427, "learning_rate": 1e-06, "loss": -0.0826, "step": 799 }, { "clip_ratio/high_max": 0.0030369122760021128, "clip_ratio/high_mean": 0.0011457976415840676, "clip_ratio/low_mean": 0.0011843351348943543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023301327382796444, "epoch": 1.8678915135608047, "grad_norm": 0.22786091268062592, "learning_rate": 1e-06, "loss": -0.0827, "step": 800 }, { "clip_ratio/high_max": 0.002827204138156958, "clip_ratio/high_mean": 0.0011934108260902576, "clip_ratio/low_mean": 0.0006371737217705231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018305845296708867, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 1183.9476318359375, "completions/mean_terminated_length": 621.7003784179688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 1.8702245552639254, "grad_norm": 0.3000062108039856, "learning_rate": 1e-06, "loss": -0.0767, "num_tokens": 118818264.0, "reward": 0.574776828289032, "reward_std": 0.1907682567834854, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 801 }, { "clip_ratio/high_max": 0.0032071580208139494, "clip_ratio/high_mean": 0.001341705417871708, "clip_ratio/low_mean": 0.0009606630992493592, "clip_ratio/low_min": 4.161653941991972e-05, "clip_ratio/region_mean": 0.002302368484379258, "epoch": 1.8725575969670458, "grad_norm": 0.2800460457801819, "learning_rate": 1e-06, "loss": -0.077, "step": 802 }, { "clip_ratio/high_max": 0.0035844488375005312, "clip_ratio/high_mean": 0.001533605121949222, "clip_ratio/low_mean": 0.0011396265435905661, "clip_ratio/low_min": 1.4282450138125569e-05, "clip_ratio/region_mean": 0.002673231625522021, "epoch": 1.8748906386701663, "grad_norm": 0.25125962495803833, "learning_rate": 1e-06, "loss": -0.0772, "step": 803 }, { "clip_ratio/high_max": 0.0033888213947648183, "clip_ratio/high_mean": 0.0013532129996747244, "clip_ratio/low_mean": 0.001306935651882668, "clip_ratio/low_min": 4.2847350414376706e-05, "clip_ratio/region_mean": 0.0026601486752042547, "epoch": 1.8772236803732867, "grad_norm": 0.2604830265045166, "learning_rate": 1e-06, "loss": -0.0773, "step": 804 }, { "clip_ratio/high_max": 0.002367687178775668, "clip_ratio/high_mean": 0.000912133504243684, "clip_ratio/low_mean": 0.0005989227101963479, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015110562089830637, "completions/clipped_ratio": 0.1741071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 1275.75341796875, "completions/mean_terminated_length": 681.2149047851562, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.8795567220764071, "grad_norm": 0.28715670108795166, "learning_rate": 1e-06, "loss": -0.0577, "num_tokens": 119402443.0, "reward": 0.5457589626312256, "reward_std": 0.16570734977722168, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981798231601715, "step": 805 }, { "clip_ratio/high_max": 0.0029728993104072288, "clip_ratio/high_mean": 0.001111834862967953, "clip_ratio/low_mean": 0.0007670903178222943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018789252208080143, "epoch": 1.8818897637795275, "grad_norm": 0.21860335767269135, "learning_rate": 1e-06, "loss": -0.058, "step": 806 }, { "clip_ratio/high_max": 0.0027008577017113566, "clip_ratio/high_mean": 0.0010888749711739365, "clip_ratio/low_mean": 0.0010045799008366885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002093454902933445, "epoch": 1.884222805482648, "grad_norm": 0.21858292818069458, "learning_rate": 1e-06, "loss": -0.058, "step": 807 }, { "clip_ratio/high_max": 0.0027396525474614464, "clip_ratio/high_mean": 0.0010619849563227035, "clip_ratio/low_mean": 0.0010708690479077632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002132853987859562, "epoch": 1.8865558471857684, "grad_norm": 0.20753753185272217, "learning_rate": 1e-06, "loss": -0.0581, "step": 808 }, { "clip_ratio/high_max": 0.002918562593549723, "clip_ratio/high_mean": 0.0010963252407236723, "clip_ratio/low_mean": 0.000893263903890329, "clip_ratio/low_min": 1.9797276763711125e-05, "clip_ratio/region_mean": 0.0019895891564374324, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3374.0, "completions/mean_length": 1344.7310791015625, "completions/mean_terminated_length": 633.7289428710938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.8888888888888888, "grad_norm": 0.34875476360321045, "learning_rate": 1e-06, "loss": -0.0851, "num_tokens": 119945450.0, "reward": 0.4888392984867096, "reward_std": 0.19268713891506195, "rewards/verify_math_reward/mean": 0.4888392984867096, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 809 }, { "clip_ratio/high_max": 0.0038939863807172514, "clip_ratio/high_mean": 0.0013524092973966617, "clip_ratio/low_mean": 0.0010970616106078523, "clip_ratio/low_min": 3.1151354050962254e-05, "clip_ratio/region_mean": 0.0024494708195561543, "epoch": 1.8912219305920095, "grad_norm": 0.3230593204498291, "learning_rate": 1e-06, "loss": -0.0852, "step": 810 }, { "clip_ratio/high_max": 0.0036844285641564056, "clip_ratio/high_mean": 0.0014590889877581503, "clip_ratio/low_mean": 0.0015696788232162362, "clip_ratio/low_min": 1.9797276763711125e-05, "clip_ratio/region_mean": 0.003028767809155397, "epoch": 1.8935549722951297, "grad_norm": 0.2665470540523529, "learning_rate": 1e-06, "loss": -0.0857, "step": 811 }, { "clip_ratio/high_max": 0.0036328440000943374, "clip_ratio/high_mean": 0.0013039033910899889, "clip_ratio/low_mean": 0.0017443379783799173, "clip_ratio/low_min": 2.9695913326577283e-05, "clip_ratio/region_mean": 0.003048241422220599, "epoch": 1.8958880139982504, "grad_norm": 0.25983041524887085, "learning_rate": 1e-06, "loss": -0.0857, "step": 812 }, { "clip_ratio/high_max": 0.0025541995419189334, "clip_ratio/high_mean": 0.0010109552877111128, "clip_ratio/low_mean": 0.0006645241810474545, "clip_ratio/low_min": 1.4460897546086926e-05, "clip_ratio/region_mean": 0.0016754794851294719, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 1209.9788818359375, "completions/mean_terminated_length": 675.5303955078125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 1.8982210557013706, "grad_norm": 0.2932416796684265, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 120553103.0, "reward": 0.5502232313156128, "reward_std": 0.15782341361045837, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 813 }, { "clip_ratio/high_max": 0.003172433156578336, "clip_ratio/high_mean": 0.0012148804053140339, "clip_ratio/low_mean": 0.0009071936674445169, "clip_ratio/low_min": 2.6595744202495553e-05, "clip_ratio/region_mean": 0.002122074060025625, "epoch": 1.9005540974044912, "grad_norm": 0.23612117767333984, "learning_rate": 1e-06, "loss": -0.0609, "step": 814 }, { "clip_ratio/high_max": 0.0029380835840129294, "clip_ratio/high_mean": 0.001169846629636595, "clip_ratio/low_mean": 0.0010059440137411002, "clip_ratio/low_min": 2.4418832254013978e-05, "clip_ratio/region_mean": 0.0021757906433776952, "epoch": 1.9028871391076114, "grad_norm": 0.2363535314798355, "learning_rate": 1e-06, "loss": -0.061, "step": 815 }, { "clip_ratio/high_max": 0.0032878915008041076, "clip_ratio/high_mean": 0.0012400051637087017, "clip_ratio/low_mean": 0.0012415621458785608, "clip_ratio/low_min": 2.8921795092173852e-05, "clip_ratio/region_mean": 0.0024815672513796017, "epoch": 1.905220180810732, "grad_norm": 0.22312773764133453, "learning_rate": 1e-06, "loss": -0.0612, "step": 816 }, { "clip_ratio/high_max": 0.0027170163157279603, "clip_ratio/high_mean": 0.0010297121934854658, "clip_ratio/low_mean": 0.0006148753263914841, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016445875189674553, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3367.0, "completions/mean_length": 1220.407470703125, "completions/mean_terminated_length": 665.19970703125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.9075532225138523, "grad_norm": 0.30903011560440063, "learning_rate": 1e-06, "loss": -0.078, "num_tokens": 121138484.0, "reward": 0.5401785969734192, "reward_std": 0.17543968558311462, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 817 }, { "clip_ratio/high_max": 0.0035944295013905503, "clip_ratio/high_mean": 0.0012570860781124793, "clip_ratio/low_mean": 0.0008134864974636002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002070572561933659, "epoch": 1.909886264216973, "grad_norm": 0.2809344232082367, "learning_rate": 1e-06, "loss": -0.0782, "step": 818 }, { "clip_ratio/high_max": 0.0034634525945875794, "clip_ratio/high_mean": 0.0012651153083425015, "clip_ratio/low_mean": 0.0009896510273392778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002254766382975504, "epoch": 1.9122193059200934, "grad_norm": 0.2349781095981598, "learning_rate": 1e-06, "loss": -0.0785, "step": 819 }, { "clip_ratio/high_max": 0.0031610069345333613, "clip_ratio/high_mean": 0.0012248718412593007, "clip_ratio/low_mean": 0.0012135470769862877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002438418916426599, "epoch": 1.9145523476232138, "grad_norm": 0.24953652918338776, "learning_rate": 1e-06, "loss": -0.0786, "step": 820 }, { "clip_ratio/high_max": 0.0028672609259956516, "clip_ratio/high_mean": 0.0011765829149226192, "clip_ratio/low_mean": 0.0007742787074676016, "clip_ratio/low_min": 1.4405900401470717e-05, "clip_ratio/region_mean": 0.0019508616242092103, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 1300.1976318359375, "completions/mean_terminated_length": 738.0388793945312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.9168853893263342, "grad_norm": 0.30159497261047363, "learning_rate": 1e-06, "loss": -0.0721, "num_tokens": 121768685.0, "reward": 0.5089285969734192, "reward_std": 0.21560588479042053, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 821 }, { "clip_ratio/high_max": 0.002927249559434131, "clip_ratio/high_mean": 0.0013193048471293878, "clip_ratio/low_mean": 0.0009674110351625131, "clip_ratio/low_min": 9.32696639210917e-06, "clip_ratio/region_mean": 0.00228671585500706, "epoch": 1.9192184310294547, "grad_norm": 0.2845073342323303, "learning_rate": 1e-06, "loss": -0.0723, "step": 822 }, { "clip_ratio/high_max": 0.0033219540782738477, "clip_ratio/high_mean": 0.0013041101992712356, "clip_ratio/low_mean": 0.0011976933037658455, "clip_ratio/low_min": 2.798089917632751e-05, "clip_ratio/region_mean": 0.002501803515770007, "epoch": 1.921551472732575, "grad_norm": 0.23670266568660736, "learning_rate": 1e-06, "loss": -0.0726, "step": 823 }, { "clip_ratio/high_max": 0.003028897750482429, "clip_ratio/high_mean": 0.0012568753845698666, "clip_ratio/low_mean": 0.001436265232769074, "clip_ratio/low_min": 2.798089917632751e-05, "clip_ratio/region_mean": 0.002693140646442771, "epoch": 1.9238845144356955, "grad_norm": 0.24771694839000702, "learning_rate": 1e-06, "loss": -0.0727, "step": 824 }, { "clip_ratio/high_max": 0.002573345904238522, "clip_ratio/high_mean": 0.0009996045755542582, "clip_ratio/low_mean": 0.0005916766449445277, "clip_ratio/low_min": 1.4654161532234866e-05, "clip_ratio/region_mean": 0.0015912812450551428, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3848.0, "completions/mean_length": 1320.219970703125, "completions/mean_terminated_length": 684.341552734375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.926217556138816, "grad_norm": 0.3412525951862335, "learning_rate": 1e-06, "loss": -0.0693, "num_tokens": 122361018.0, "reward": 0.5703125, "reward_std": 0.16457942128181458, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 825 }, { "clip_ratio/high_max": 0.0028175055631436408, "clip_ratio/high_mean": 0.0011196003197255777, "clip_ratio/low_mean": 0.0007795236651872983, "clip_ratio/low_min": 2.9308323064469732e-05, "clip_ratio/region_mean": 0.0018991239558090456, "epoch": 1.9285505978419364, "grad_norm": 0.3357429504394531, "learning_rate": 1e-06, "loss": -0.0695, "step": 826 }, { "clip_ratio/high_max": 0.002739876370469574, "clip_ratio/high_mean": 0.0010583981857053004, "clip_ratio/low_mean": 0.0008846381160765304, "clip_ratio/low_min": 3.643252784968354e-05, "clip_ratio/region_mean": 0.0019430363063293044, "epoch": 1.9308836395450568, "grad_norm": 0.22380100190639496, "learning_rate": 1e-06, "loss": -0.0697, "step": 827 }, { "clip_ratio/high_max": 0.002658689318195684, "clip_ratio/high_mean": 0.001039948532707058, "clip_ratio/low_mean": 0.001099562708986923, "clip_ratio/low_min": 7.147962605813518e-05, "clip_ratio/region_mean": 0.0021395112489699386, "epoch": 1.9332166812481772, "grad_norm": 0.22586019337177277, "learning_rate": 1e-06, "loss": -0.0698, "step": 828 }, { "clip_ratio/high_max": 0.002943986553873401, "clip_ratio/high_mean": 0.0011864731186506106, "clip_ratio/low_mean": 0.0005792369875052827, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001765710097970441, "completions/clipped_ratio": 0.1930803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 1342.927490234375, "completions/mean_terminated_length": 684.170166015625, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 1.935549722951298, "grad_norm": 0.3448341190814972, "learning_rate": 1e-06, "loss": -0.1174, "num_tokens": 122946665.0, "reward": 0.5301339626312256, "reward_std": 0.21578949689865112, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 829 }, { "clip_ratio/high_max": 0.003517591430863831, "clip_ratio/high_mean": 0.001492794162913924, "clip_ratio/low_mean": 0.0008221051957661984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002314899320481345, "epoch": 1.937882764654418, "grad_norm": 0.2709249258041382, "learning_rate": 1e-06, "loss": -0.1177, "step": 830 }, { "clip_ratio/high_max": 0.0034801322108251043, "clip_ratio/high_mean": 0.0015445070421264973, "clip_ratio/low_mean": 0.0010751401532616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026196471881121397, "epoch": 1.9402158063575388, "grad_norm": 0.22620545327663422, "learning_rate": 1e-06, "loss": -0.118, "step": 831 }, { "clip_ratio/high_max": 0.0033398918312741444, "clip_ratio/high_mean": 0.0014138497790554538, "clip_ratio/low_mean": 0.0012099658979423111, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026238156788167544, "epoch": 1.942548848060659, "grad_norm": 0.2483496516942978, "learning_rate": 1e-06, "loss": -0.118, "step": 832 }, { "clip_ratio/high_max": 0.0026773650461109355, "clip_ratio/high_mean": 0.0010678252874640748, "clip_ratio/low_mean": 0.00047232585620804457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015401511409436353, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3823.0, "completions/mean_length": 1023.07373046875, "completions/mean_terminated_length": 584.0841674804688, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.9448818897637796, "grad_norm": 0.35285845398902893, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 123489811.0, "reward": 0.6495535969734192, "reward_std": 0.18645039200782776, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 833 }, { "clip_ratio/high_max": 0.0034253023914061487, "clip_ratio/high_mean": 0.0013965224497951567, "clip_ratio/low_mean": 0.0007195341222541174, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021160565956961364, "epoch": 1.9472149314668998, "grad_norm": 0.2906281352043152, "learning_rate": 1e-06, "loss": -0.0498, "step": 834 }, { "clip_ratio/high_max": 0.0034584589520818554, "clip_ratio/high_mean": 0.001384539864375256, "clip_ratio/low_mean": 0.0009951573447324336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023796972673153505, "epoch": 1.9495479731700205, "grad_norm": 0.22655533254146576, "learning_rate": 1e-06, "loss": -0.05, "step": 835 }, { "clip_ratio/high_max": 0.002966634900076315, "clip_ratio/high_mean": 0.0012003569754597265, "clip_ratio/low_mean": 0.0012030265734210843, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00240338352159597, "epoch": 1.9518810148731407, "grad_norm": 0.25467953085899353, "learning_rate": 1e-06, "loss": -0.05, "step": 836 }, { "clip_ratio/high_max": 0.0023573459111503325, "clip_ratio/high_mean": 0.0008272162631328683, "clip_ratio/low_mean": 0.0009178584023175063, "clip_ratio/low_min": 0.0001526445712443092, "clip_ratio/region_mean": 0.0017450746599934064, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 1196.7410888671875, "completions/mean_terminated_length": 641.5637817382812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.9542140565762613, "grad_norm": 0.3911133110523224, "learning_rate": 1e-06, "loss": -0.0449, "num_tokens": 124054891.0, "reward": 0.6004464626312256, "reward_std": 0.1792774647474289, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 837 }, { "clip_ratio/high_max": 0.0034634961775736883, "clip_ratio/high_mean": 0.0011457822802185547, "clip_ratio/low_mean": 0.0012875593711214606, "clip_ratio/low_min": 9.394089011038886e-05, "clip_ratio/region_mean": 0.002433341673167888, "epoch": 1.9565470982793818, "grad_norm": 0.32581472396850586, "learning_rate": 1e-06, "loss": -0.0454, "step": 838 }, { "clip_ratio/high_max": 0.0031172664603218436, "clip_ratio/high_mean": 0.0010698406240408076, "clip_ratio/low_mean": 0.001597761751327198, "clip_ratio/low_min": 0.00014222434037947096, "clip_ratio/region_mean": 0.0026676024426706135, "epoch": 1.9588801399825022, "grad_norm": 0.2666594386100769, "learning_rate": 1e-06, "loss": -0.0456, "step": 839 }, { "clip_ratio/high_max": 0.0027453268921817653, "clip_ratio/high_mean": 0.0009260922197427135, "clip_ratio/low_mean": 0.002023709843342658, "clip_ratio/low_min": 0.00011088754854426952, "clip_ratio/region_mean": 0.0029498021976905875, "epoch": 1.9612131816856226, "grad_norm": 0.2504991292953491, "learning_rate": 1e-06, "loss": -0.0457, "step": 840 }, { "clip_ratio/high_max": 0.0022448038835136686, "clip_ratio/high_mean": 0.0008656120189698413, "clip_ratio/low_mean": 0.0005494064107551822, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014150184106256347, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3817.0, "completions/mean_length": 1129.015625, "completions/mean_terminated_length": 652.453369140625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 1.963546223388743, "grad_norm": 0.2795056998729706, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 124648505.0, "reward": 0.578125, "reward_std": 0.1662386953830719, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 841 }, { "clip_ratio/high_max": 0.0024054357418208383, "clip_ratio/high_mean": 0.0010389593844593037, "clip_ratio/low_mean": 0.000745671023196337, "clip_ratio/low_min": 1.4022884897713084e-05, "clip_ratio/region_mean": 0.0017846304181148298, "epoch": 1.9658792650918635, "grad_norm": 0.22816675901412964, "learning_rate": 1e-06, "loss": -0.0577, "step": 842 }, { "clip_ratio/high_max": 0.0024598441887064837, "clip_ratio/high_mean": 0.001023146014631493, "clip_ratio/low_mean": 0.0008784550973359728, "clip_ratio/low_min": 1.4022884897713084e-05, "clip_ratio/region_mean": 0.0019016010846826248, "epoch": 1.968212306794984, "grad_norm": 0.21649332344532013, "learning_rate": 1e-06, "loss": -0.0579, "step": 843 }, { "clip_ratio/high_max": 0.0024888858024496585, "clip_ratio/high_mean": 0.0009545433949824655, "clip_ratio/low_mean": 0.0010246585416098242, "clip_ratio/low_min": 1.4022884897713084e-05, "clip_ratio/region_mean": 0.0019792019520536996, "epoch": 1.9705453484981044, "grad_norm": 0.2124703973531723, "learning_rate": 1e-06, "loss": -0.058, "step": 844 }, { "clip_ratio/high_max": 0.002497881134331692, "clip_ratio/high_mean": 0.0009816796955419704, "clip_ratio/low_mean": 0.00041495259642942983, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013966322985652369, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3616.0, "completions/mean_length": 1204.2154541015625, "completions/mean_terminated_length": 604.03369140625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.9728783902012248, "grad_norm": 0.33216366171836853, "learning_rate": 1e-06, "loss": -0.1035, "num_tokens": 125181234.0, "reward": 0.5948660969734192, "reward_std": 0.16175968945026398, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 845 }, { "clip_ratio/high_max": 0.003344422126247082, "clip_ratio/high_mean": 0.001384327799314633, "clip_ratio/low_mean": 0.0007072462321957573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020915740460623056, "epoch": 1.9752114319043454, "grad_norm": 0.2383900135755539, "learning_rate": 1e-06, "loss": -0.1038, "step": 846 }, { "clip_ratio/high_max": 0.0031971252901712433, "clip_ratio/high_mean": 0.0012845905948779546, "clip_ratio/low_mean": 0.0008054647610151733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020900553645333275, "epoch": 1.9775444736074657, "grad_norm": 0.21354363858699799, "learning_rate": 1e-06, "loss": -0.104, "step": 847 }, { "clip_ratio/high_max": 0.003370238082425203, "clip_ratio/high_mean": 0.0012448482084437273, "clip_ratio/low_mean": 0.0009382779990119161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002183126227464527, "epoch": 1.9798775153105863, "grad_norm": 0.2377062439918518, "learning_rate": 1e-06, "loss": -0.104, "step": 848 }, { "clip_ratio/high_max": 0.0023006051560514607, "clip_ratio/high_mean": 0.0007807049223629292, "clip_ratio/low_mean": 0.0006758223921679019, "clip_ratio/low_min": 1.125720427808119e-05, "clip_ratio/region_mean": 0.0014565272940672003, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 1125.243408203125, "completions/mean_terminated_length": 584.3931884765625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.9822105570137065, "grad_norm": 0.3098432719707489, "learning_rate": 1e-06, "loss": -0.0745, "num_tokens": 125716956.0, "reward": 0.5602678656578064, "reward_std": 0.1442854106426239, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 849 }, { "clip_ratio/high_max": 0.002714711728913244, "clip_ratio/high_mean": 0.0009240049912477843, "clip_ratio/low_mean": 0.0010237854571641947, "clip_ratio/low_min": 1.125720427808119e-05, "clip_ratio/region_mean": 0.001947790471604094, "epoch": 1.9845435987168272, "grad_norm": 0.3019099533557892, "learning_rate": 1e-06, "loss": -0.0748, "step": 850 }, { "clip_ratio/high_max": 0.0033190110116265714, "clip_ratio/high_mean": 0.0011018242767022457, "clip_ratio/low_mean": 0.0011715921486938896, "clip_ratio/low_min": 2.471332481945865e-05, "clip_ratio/region_mean": 0.0022734164231223986, "epoch": 1.9868766404199474, "grad_norm": 0.26513129472732544, "learning_rate": 1e-06, "loss": -0.075, "step": 851 }, { "clip_ratio/high_max": 0.0026627587794791907, "clip_ratio/high_mean": 0.000974896540355985, "clip_ratio/low_mean": 0.0013590649268735433, "clip_ratio/low_min": 1.125720427808119e-05, "clip_ratio/region_mean": 0.0023339615072472952, "epoch": 1.989209682123068, "grad_norm": 0.2309122234582901, "learning_rate": 1e-06, "loss": -0.075, "step": 852 }, { "clip_ratio/high_max": 0.0028166246265755035, "clip_ratio/high_mean": 0.0010274419109919108, "clip_ratio/low_mean": 0.0005627615837511257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015902034610917326, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3683.0, "completions/mean_length": 1177.8560791015625, "completions/mean_terminated_length": 619.0625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.9915427238261882, "grad_norm": 0.35362178087234497, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 126266563.0, "reward": 0.606026828289032, "reward_std": 0.14508774876594543, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 853 }, { "clip_ratio/high_max": 0.0031189397268462926, "clip_ratio/high_mean": 0.0012358821859379532, "clip_ratio/low_mean": 0.0008159829521900974, "clip_ratio/low_min": 3.2946758437901735e-05, "clip_ratio/region_mean": 0.0020518651654128917, "epoch": 1.993875765529309, "grad_norm": 0.2607204020023346, "learning_rate": 1e-06, "loss": -0.0616, "step": 854 }, { "clip_ratio/high_max": 0.003127547497570049, "clip_ratio/high_mean": 0.001196386856463505, "clip_ratio/low_mean": 0.000987397224889719, "clip_ratio/low_min": 4.9498416046844795e-05, "clip_ratio/region_mean": 0.0021837840831722133, "epoch": 1.9962088072324293, "grad_norm": 0.23352815210819244, "learning_rate": 1e-06, "loss": -0.0618, "step": 855 }, { "clip_ratio/high_max": 0.003170761585352011, "clip_ratio/high_mean": 0.0011412914191168966, "clip_ratio/low_mean": 0.0012092185934307054, "clip_ratio/low_min": 4.9498416046844795e-05, "clip_ratio/region_mean": 0.0023505100107286125, "epoch": 1.9985418489355498, "grad_norm": 0.23547892272472382, "learning_rate": 1e-06, "loss": -0.0619, "step": 856 }, { "clip_ratio/high_max": 0.003047192654776154, "clip_ratio/high_mean": 0.0011949555737373885, "clip_ratio/low_mean": 0.0007931789368740283, "clip_ratio/low_min": 1.1322464160912205e-05, "clip_ratio/region_mean": 0.0019881345360772684, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 1159.671875, "completions/mean_terminated_length": 620.504638671875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.0023330417031207, "grad_norm": 0.4390386939048767, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 126824773.0, "reward": 0.5714285969734192, "reward_std": 0.17874936759471893, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 857 }, { "clip_ratio/high_max": 0.003988200696767308, "clip_ratio/high_mean": 0.0014030033053131774, "clip_ratio/low_mean": 0.0011837988859042525, "clip_ratio/low_min": 5.661231989506632e-05, "clip_ratio/region_mean": 0.002586802133009769, "epoch": 2.004666083406241, "grad_norm": 0.2932032644748688, "learning_rate": 1e-06, "loss": -0.0593, "step": 858 }, { "clip_ratio/high_max": 0.0027904905364266597, "clip_ratio/high_mean": 0.0012420199327607406, "clip_ratio/low_mean": 0.0014423892898776103, "clip_ratio/low_min": 1.1322464160912205e-05, "clip_ratio/region_mean": 0.002684409155335743, "epoch": 2.0069991251093615, "grad_norm": 0.2708625793457031, "learning_rate": 1e-06, "loss": -0.0596, "step": 859 }, { "clip_ratio/high_max": 0.0039043327851686627, "clip_ratio/high_mean": 0.0014121150343271438, "clip_ratio/low_mean": 0.0015946616549626924, "clip_ratio/low_min": 4.528985664364882e-05, "clip_ratio/region_mean": 0.0030067766856518574, "epoch": 2.0093321668124817, "grad_norm": 0.2647579312324524, "learning_rate": 1e-06, "loss": -0.0597, "step": 860 }, { "clip_ratio/high_max": 0.002119186057825573, "clip_ratio/high_mean": 0.0007913218196335947, "clip_ratio/low_mean": 0.0004452995322026254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012366213377390523, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2324.0, "completions/mean_length": 1149.997802734375, "completions/mean_terminated_length": 627.3823852539062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.0116652085156024, "grad_norm": 0.3910108208656311, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 127391891.0, "reward": 0.590401828289032, "reward_std": 0.14556488394737244, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 861 }, { "clip_ratio/high_max": 0.0025455272843828425, "clip_ratio/high_mean": 0.0009815586654440267, "clip_ratio/low_mean": 0.0006933992754056817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016749579808674753, "epoch": 2.0139982502187226, "grad_norm": 0.19863538444042206, "learning_rate": 1e-06, "loss": -0.0551, "step": 862 }, { "clip_ratio/high_max": 0.0025782149605220184, "clip_ratio/high_mean": 0.000941302307182923, "clip_ratio/low_mean": 0.000803361215730547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001744663510180544, "epoch": 2.0163312919218432, "grad_norm": 0.2755461037158966, "learning_rate": 1e-06, "loss": -0.0552, "step": 863 }, { "clip_ratio/high_max": 0.002186407222325215, "clip_ratio/high_mean": 0.0008494528065057239, "clip_ratio/low_mean": 0.000943040229685721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017924930216395296, "epoch": 2.0186643336249634, "grad_norm": 0.2356482893228531, "learning_rate": 1e-06, "loss": -0.0552, "step": 864 }, { "clip_ratio/high_max": 0.0025537680339766666, "clip_ratio/high_mean": 0.0010160112324228976, "clip_ratio/low_mean": 0.0005593823034359957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015753935804241337, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2578.0, "completions/mean_length": 1229.4866943359375, "completions/mean_terminated_length": 634.5498657226562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.020997375328084, "grad_norm": 0.3333074152469635, "learning_rate": 1e-06, "loss": -0.0824, "num_tokens": 127956423.0, "reward": 0.5524553656578064, "reward_std": 0.16424313187599182, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 865 }, { "clip_ratio/high_max": 0.002955421070510056, "clip_ratio/high_mean": 0.0011224296758882701, "clip_ratio/low_mean": 0.0008748107065912336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001997240397031419, "epoch": 2.0233304170312043, "grad_norm": 0.25484535098075867, "learning_rate": 1e-06, "loss": -0.0827, "step": 866 }, { "clip_ratio/high_max": 0.0030243089931900613, "clip_ratio/high_mean": 0.0012547181722766254, "clip_ratio/low_mean": 0.0010132673451153096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022679855319438502, "epoch": 2.025663458734325, "grad_norm": 0.282296746969223, "learning_rate": 1e-06, "loss": -0.0829, "step": 867 }, { "clip_ratio/high_max": 0.002858953630493488, "clip_ratio/high_mean": 0.0011380158939573448, "clip_ratio/low_mean": 0.001099641729524592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022376576234819368, "epoch": 2.027996500437445, "grad_norm": 0.22792915999889374, "learning_rate": 1e-06, "loss": -0.083, "step": 868 }, { "clip_ratio/high_max": 0.0023803658405086026, "clip_ratio/high_mean": 0.0008531928397133015, "clip_ratio/low_mean": 0.0007015458722889889, "clip_ratio/low_min": 2.8506270609796047e-05, "clip_ratio/region_mean": 0.0015547387047263328, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 1315.3851318359375, "completions/mean_terminated_length": 742.79541015625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 2.030329542140566, "grad_norm": 0.3597649931907654, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 128599632.0, "reward": 0.5055803656578064, "reward_std": 0.1872745305299759, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 869 }, { "clip_ratio/high_max": 0.0026170422934228554, "clip_ratio/high_mean": 0.000998484163574176, "clip_ratio/low_mean": 0.000972096813711687, "clip_ratio/low_min": 3.850102802971378e-05, "clip_ratio/region_mean": 0.0019705809900187887, "epoch": 2.032662583843686, "grad_norm": 0.2861005365848541, "learning_rate": 1e-06, "loss": -0.0662, "step": 870 }, { "clip_ratio/high_max": 0.003180027582857292, "clip_ratio/high_mean": 0.0011111904823337682, "clip_ratio/low_mean": 0.0011452123708295403, "clip_ratio/low_min": 3.850102802971378e-05, "clip_ratio/region_mean": 0.0022564028768101707, "epoch": 2.0349956255468067, "grad_norm": 0.2486860156059265, "learning_rate": 1e-06, "loss": -0.0664, "step": 871 }, { "clip_ratio/high_max": 0.00269731388107175, "clip_ratio/high_mean": 0.001022971280690399, "clip_ratio/low_mean": 0.0013633073940582108, "clip_ratio/low_min": 2.5667352019809186e-05, "clip_ratio/region_mean": 0.0023862786110839806, "epoch": 2.037328667249927, "grad_norm": 0.36107203364372253, "learning_rate": 1e-06, "loss": -0.0665, "step": 872 }, { "clip_ratio/high_max": 0.0027890367164218333, "clip_ratio/high_mean": 0.001022377216941095, "clip_ratio/low_mean": 0.000612402500337339, "clip_ratio/low_min": 1.7193948224303313e-05, "clip_ratio/region_mean": 0.001634779735468328, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3322.0, "completions/mean_length": 1023.1563110351562, "completions/mean_terminated_length": 588.6522216796875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 2.0396617089530475, "grad_norm": 0.338905930519104, "learning_rate": 1e-06, "loss": -0.0368, "num_tokens": 129149068.0, "reward": 0.6517857313156128, "reward_std": 0.17029374837875366, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 873 }, { "clip_ratio/high_max": 0.003292863482784014, "clip_ratio/high_mean": 0.0011018429668183671, "clip_ratio/low_mean": 0.0008577700646128505, "clip_ratio/low_min": 1.691932811809238e-05, "clip_ratio/region_mean": 0.001959613015060313, "epoch": 2.041994750656168, "grad_norm": 0.2677460312843323, "learning_rate": 1e-06, "loss": -0.037, "step": 874 }, { "clip_ratio/high_max": 0.0032929509761743248, "clip_ratio/high_mean": 0.001230530640896177, "clip_ratio/low_mean": 0.0010246034125884762, "clip_ratio/low_min": 5.4959549743216485e-05, "clip_ratio/region_mean": 0.0022551340371137485, "epoch": 2.0443277923592884, "grad_norm": 0.23969261348247528, "learning_rate": 1e-06, "loss": -0.0372, "step": 875 }, { "clip_ratio/high_max": 0.0036424016871023923, "clip_ratio/high_mean": 0.0012846665631514043, "clip_ratio/low_mean": 0.0012816219095839188, "clip_ratio/low_min": 6.595146260224283e-05, "clip_ratio/region_mean": 0.002566288509115111, "epoch": 2.046660834062409, "grad_norm": 0.22869691252708435, "learning_rate": 1e-06, "loss": -0.0373, "step": 876 }, { "clip_ratio/high_max": 0.0020634450411307625, "clip_ratio/high_mean": 0.0008783011871855706, "clip_ratio/low_mean": 0.0007541520153608872, "clip_ratio/low_min": 1.429551684850594e-05, "clip_ratio/region_mean": 0.001632453189813532, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2833.0, "completions/mean_length": 1407.37841796875, "completions/mean_terminated_length": 674.117919921875, "completions/min_length": 206.0, "completions/min_terminated_length": 206.0, "epoch": 2.0489938757655293, "grad_norm": 0.4166792631149292, "learning_rate": 1e-06, "loss": -0.0569, "num_tokens": 129718183.0, "reward": 0.5189732313156128, "reward_std": 0.18329225480556488, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 877 }, { "clip_ratio/high_max": 0.002453501168929506, "clip_ratio/high_mean": 0.0010702426134230336, "clip_ratio/low_mean": 0.0010067290804727236, "clip_ratio/low_min": 1.429551684850594e-05, "clip_ratio/region_mean": 0.0020769717302755453, "epoch": 2.05132691746865, "grad_norm": 0.24961958825588226, "learning_rate": 1e-06, "loss": -0.0571, "step": 878 }, { "clip_ratio/high_max": 0.002625891262141522, "clip_ratio/high_mean": 0.0011261152212682646, "clip_ratio/low_mean": 0.001208353744004853, "clip_ratio/low_min": 6.284304254222661e-05, "clip_ratio/region_mean": 0.0023344689834630117, "epoch": 2.05365995917177, "grad_norm": 0.23191159963607788, "learning_rate": 1e-06, "loss": -0.0573, "step": 879 }, { "clip_ratio/high_max": 0.002479023845808115, "clip_ratio/high_mean": 0.0010229359704680974, "clip_ratio/low_mean": 0.0014079654647503048, "clip_ratio/low_min": 4.4445929233916104e-05, "clip_ratio/region_mean": 0.002430901469779201, "epoch": 2.055993000874891, "grad_norm": 0.21616202592849731, "learning_rate": 1e-06, "loss": -0.0574, "step": 880 }, { "clip_ratio/high_max": 0.002158216477255337, "clip_ratio/high_mean": 0.0007028610289125936, "clip_ratio/low_mean": 0.000718300512744463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014211615598469507, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3887.0, "completions/mean_length": 1123.07373046875, "completions/mean_terminated_length": 600.2755737304688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 2.058326042578011, "grad_norm": 0.2821815311908722, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 130263505.0, "reward": 0.5837053656578064, "reward_std": 0.1345210075378418, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 881 }, { "clip_ratio/high_max": 0.0029855417924409267, "clip_ratio/high_mean": 0.0009691047216620063, "clip_ratio/low_mean": 0.0008622468667454086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018313515683985315, "epoch": 2.0606590842811316, "grad_norm": 0.2728791832923889, "learning_rate": 1e-06, "loss": -0.0432, "step": 882 }, { "clip_ratio/high_max": 0.002525583593524061, "clip_ratio/high_mean": 0.0008816239460429642, "clip_ratio/low_mean": 0.0010311347541573923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019127586492686532, "epoch": 2.062992125984252, "grad_norm": 0.25536054372787476, "learning_rate": 1e-06, "loss": -0.0433, "step": 883 }, { "clip_ratio/high_max": 0.0026348084647906944, "clip_ratio/high_mean": 0.0008546148319510394, "clip_ratio/low_mean": 0.001253188143891748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002107803011313081, "epoch": 2.0653251676873725, "grad_norm": 0.21684107184410095, "learning_rate": 1e-06, "loss": -0.0434, "step": 884 }, { "clip_ratio/high_max": 0.002526842705265153, "clip_ratio/high_mean": 0.0008572160531912232, "clip_ratio/low_mean": 0.00048245869129459606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013396747344813775, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3284.0, "completions/mean_length": 1280.11279296875, "completions/mean_terminated_length": 700.259765625, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.0676582093904927, "grad_norm": 0.23400527238845825, "learning_rate": 1e-06, "loss": -0.0832, "num_tokens": 130875446.0, "reward": 0.5580357313156128, "reward_std": 0.16235631704330444, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 885 }, { "clip_ratio/high_max": 0.002830396333592944, "clip_ratio/high_mean": 0.001031815798341995, "clip_ratio/low_mean": 0.0005952802539468394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016270960513793398, "epoch": 2.0699912510936134, "grad_norm": 0.209345743060112, "learning_rate": 1e-06, "loss": -0.0833, "step": 886 }, { "clip_ratio/high_max": 0.003029807143320795, "clip_ratio/high_mean": 0.0010907083978963783, "clip_ratio/low_mean": 0.0007710019472142449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001861710330558708, "epoch": 2.0723242927967336, "grad_norm": 0.26701340079307556, "learning_rate": 1e-06, "loss": -0.0834, "step": 887 }, { "clip_ratio/high_max": 0.0031339623310486786, "clip_ratio/high_mean": 0.0010413353120384272, "clip_ratio/low_mean": 0.0008892381974874297, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001930573518620804, "epoch": 2.0746573344998542, "grad_norm": 0.18892860412597656, "learning_rate": 1e-06, "loss": -0.0835, "step": 888 }, { "clip_ratio/high_max": 0.002224198036856251, "clip_ratio/high_mean": 0.0007760922162560746, "clip_ratio/low_mean": 0.0005144084084349743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012905006024084287, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 1177.1707763671875, "completions/mean_terminated_length": 636.6467895507812, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.0769903762029744, "grad_norm": 0.3154468238353729, "learning_rate": 1e-06, "loss": -0.0547, "num_tokens": 131457871.0, "reward": 0.6071428656578064, "reward_std": 0.13139888644218445, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 889 }, { "clip_ratio/high_max": 0.002584983390988782, "clip_ratio/high_mean": 0.0009189535740006249, "clip_ratio/low_mean": 0.0007904853746367735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017094390022975858, "epoch": 2.079323417906095, "grad_norm": 0.23959881067276, "learning_rate": 1e-06, "loss": -0.0549, "step": 890 }, { "clip_ratio/high_max": 0.00268344135110965, "clip_ratio/high_mean": 0.0008809746614133473, "clip_ratio/low_mean": 0.0009276802393287653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018086548880091868, "epoch": 2.0816564596092153, "grad_norm": 0.20634399354457855, "learning_rate": 1e-06, "loss": -0.055, "step": 891 }, { "clip_ratio/high_max": 0.002773980326310266, "clip_ratio/high_mean": 0.0009158890570688527, "clip_ratio/low_mean": 0.0010924457674263977, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002008334849961102, "epoch": 2.083989501312336, "grad_norm": 0.1906619518995285, "learning_rate": 1e-06, "loss": -0.0551, "step": 892 }, { "clip_ratio/high_max": 0.0020683441616711207, "clip_ratio/high_mean": 0.0007411143869830994, "clip_ratio/low_mean": 0.0006184447379382618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013595591444754973, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 1174.2410888671875, "completions/mean_terminated_length": 651.4000244140625, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 2.0863225430154566, "grad_norm": 0.3441523015499115, "learning_rate": 1e-06, "loss": -0.0756, "num_tokens": 132038639.0, "reward": 0.6194196939468384, "reward_std": 0.13444431126117706, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 893 }, { "clip_ratio/high_max": 0.002379316763835959, "clip_ratio/high_mean": 0.0009014450861286605, "clip_ratio/low_mean": 0.0008722010829842475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017736461850290652, "epoch": 2.088655584718577, "grad_norm": 0.2408902794122696, "learning_rate": 1e-06, "loss": -0.0758, "step": 894 }, { "clip_ratio/high_max": 0.0024297764684888534, "clip_ratio/high_mean": 0.0009308865046477877, "clip_ratio/low_mean": 0.0010580024304545077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001988888980122283, "epoch": 2.0909886264216975, "grad_norm": 0.2293797731399536, "learning_rate": 1e-06, "loss": -0.076, "step": 895 }, { "clip_ratio/high_max": 0.002157798364351038, "clip_ratio/high_mean": 0.0008320046144945081, "clip_ratio/low_mean": 0.0011370203883416252, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001969025001017144, "epoch": 2.0933216681248177, "grad_norm": 0.1965988725423813, "learning_rate": 1e-06, "loss": -0.0761, "step": 896 }, { "clip_ratio/high_max": 0.0025361771185998805, "clip_ratio/high_mean": 0.000843985199026065, "clip_ratio/low_mean": 0.0005831091912114061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014270944047893863, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 1246.141845703125, "completions/mean_terminated_length": 682.2660522460938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.0956547098279383, "grad_norm": 0.29047125577926636, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 132647382.0, "reward": 0.5290178656578064, "reward_std": 0.15702247619628906, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 897 }, { "clip_ratio/high_max": 0.003130072793283034, "clip_ratio/high_mean": 0.0011506956107041333, "clip_ratio/low_mean": 0.0007367316311501781, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018874272573157214, "epoch": 2.0979877515310585, "grad_norm": 0.27660995721817017, "learning_rate": 1e-06, "loss": -0.0674, "step": 898 }, { "clip_ratio/high_max": 0.0028732167556881905, "clip_ratio/high_mean": 0.0011284256470389664, "clip_ratio/low_mean": 0.000960379215030116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020888048675260507, "epoch": 2.100320793234179, "grad_norm": 0.20674245059490204, "learning_rate": 1e-06, "loss": -0.0676, "step": 899 }, { "clip_ratio/high_max": 0.0031517503448412754, "clip_ratio/high_mean": 0.0010357327191741206, "clip_ratio/low_mean": 0.0009695559638203122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00200528866116656, "epoch": 2.1026538349372994, "grad_norm": 0.2387486696243286, "learning_rate": 1e-06, "loss": -0.0676, "step": 900 }, { "clip_ratio/high_max": 0.0027165291612618603, "clip_ratio/high_mean": 0.0008888476822903613, "clip_ratio/low_mean": 0.0005623961233141017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014512437846860848, "completions/clipped_ratio": 0.1886160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3551.0, "completions/mean_length": 1303.485595703125, "completions/mean_terminated_length": 654.3314819335938, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 2.10498687664042, "grad_norm": 0.33062219619750977, "learning_rate": 1e-06, "loss": -0.0628, "num_tokens": 133214929.0, "reward": 0.5524553656578064, "reward_std": 0.15578144788742065, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 901 }, { "clip_ratio/high_max": 0.00301307208428625, "clip_ratio/high_mean": 0.0011444835472502746, "clip_ratio/low_mean": 0.0009213571383952512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020658407011069357, "epoch": 2.1073199183435403, "grad_norm": 0.2832688093185425, "learning_rate": 1e-06, "loss": -0.0631, "step": 902 }, { "clip_ratio/high_max": 0.0031409784132847562, "clip_ratio/high_mean": 0.0011654591144178994, "clip_ratio/low_mean": 0.0010380928288213909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022035519432392903, "epoch": 2.109652960046661, "grad_norm": 0.2659349739551544, "learning_rate": 1e-06, "loss": -0.0633, "step": 903 }, { "clip_ratio/high_max": 0.003178455473971553, "clip_ratio/high_mean": 0.001116634884965606, "clip_ratio/low_mean": 0.0013046228841631091, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024212577409343794, "epoch": 2.111986001749781, "grad_norm": 0.23541253805160522, "learning_rate": 1e-06, "loss": -0.0633, "step": 904 }, { "clip_ratio/high_max": 0.002693645998078864, "clip_ratio/high_mean": 0.001170849511254346, "clip_ratio/low_mean": 0.000556333008717047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017271825417992659, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2707.0, "completions/mean_length": 1236.67529296875, "completions/mean_terminated_length": 600.8363037109375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.114319043452902, "grad_norm": 0.6479451656341553, "learning_rate": 1e-06, "loss": -0.1152, "num_tokens": 133738094.0, "reward": 0.590401828289032, "reward_std": 0.18558135628700256, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 905 }, { "clip_ratio/high_max": 0.003487138521450106, "clip_ratio/high_mean": 0.0013652735142386518, "clip_ratio/low_mean": 0.0008751920686336234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022404655610444024, "epoch": 2.116652085156022, "grad_norm": 0.3784768581390381, "learning_rate": 1e-06, "loss": -0.1155, "step": 906 }, { "clip_ratio/high_max": 0.0031059396715136245, "clip_ratio/high_mean": 0.0013185599891585298, "clip_ratio/low_mean": 0.0009855562639131676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023041162858135067, "epoch": 2.1189851268591426, "grad_norm": 0.37493959069252014, "learning_rate": 1e-06, "loss": -0.1157, "step": 907 }, { "clip_ratio/high_max": 0.0032188539116759785, "clip_ratio/high_mean": 0.0013216879997344222, "clip_ratio/low_mean": 0.0012801653465430718, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026018533026217483, "epoch": 2.121318168562263, "grad_norm": 0.23647190630435944, "learning_rate": 1e-06, "loss": -0.1159, "step": 908 }, { "clip_ratio/high_max": 0.00239591280114837, "clip_ratio/high_mean": 0.000833051448353217, "clip_ratio/low_mean": 0.0005077994796920393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001340850900305668, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2666.0, "completions/mean_length": 1196.2957763671875, "completions/mean_terminated_length": 627.1949462890625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.1236512102653835, "grad_norm": 0.31013181805610657, "learning_rate": 1e-06, "loss": -0.0991, "num_tokens": 134296271.0, "reward": 0.609375, "reward_std": 0.15323200821876526, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 909 }, { "clip_ratio/high_max": 0.003133546299068257, "clip_ratio/high_mean": 0.0010264157244819216, "clip_ratio/low_mean": 0.000651560700475784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001677976415521698, "epoch": 2.1259842519685037, "grad_norm": 0.2695416808128357, "learning_rate": 1e-06, "loss": -0.0993, "step": 910 }, { "clip_ratio/high_max": 0.0030468583208858036, "clip_ratio/high_mean": 0.001055873934092233, "clip_ratio/low_mean": 0.0008514502305843052, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019073241783189587, "epoch": 2.1283172936716244, "grad_norm": 0.22806468605995178, "learning_rate": 1e-06, "loss": -0.0995, "step": 911 }, { "clip_ratio/high_max": 0.0028933032081113197, "clip_ratio/high_mean": 0.0009494455625826959, "clip_ratio/low_mean": 0.001000595917503233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001950041449163109, "epoch": 2.130650335374745, "grad_norm": 0.21979136765003204, "learning_rate": 1e-06, "loss": -0.0996, "step": 912 }, { "clip_ratio/high_max": 0.003457512670138385, "clip_ratio/high_mean": 0.0012429355201675207, "clip_ratio/low_mean": 0.0006668744454145781, "clip_ratio/low_min": 2.9719449230469763e-05, "clip_ratio/region_mean": 0.001909809943754226, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3929.0, "completions/mean_length": 1314.5301513671875, "completions/mean_terminated_length": 709.86279296875, "completions/min_length": 189.0, "completions/min_terminated_length": 189.0, "epoch": 2.1329833770778652, "grad_norm": 0.4042540192604065, "learning_rate": 1e-06, "loss": -0.054, "num_tokens": 134913666.0, "reward": 0.5290178656578064, "reward_std": 0.18719714879989624, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943605065345764, "step": 913 }, { "clip_ratio/high_max": 0.003955599411710864, "clip_ratio/high_mean": 0.0015156144054344622, "clip_ratio/low_mean": 0.0010538833803366288, "clip_ratio/low_min": 7.302063113456825e-05, "clip_ratio/region_mean": 0.002569497715739999, "epoch": 2.135316418780986, "grad_norm": 0.339517742395401, "learning_rate": 1e-06, "loss": -0.0543, "step": 914 }, { "clip_ratio/high_max": 0.0038658706289425027, "clip_ratio/high_mean": 0.001414357790054055, "clip_ratio/low_mean": 0.001270181384825264, "clip_ratio/low_min": 8.306038125738269e-05, "clip_ratio/region_mean": 0.002684539220354054, "epoch": 2.137649460484106, "grad_norm": 0.300604909658432, "learning_rate": 1e-06, "loss": -0.0545, "step": 915 }, { "clip_ratio/high_max": 0.003662280912976712, "clip_ratio/high_mean": 0.001388209870128776, "clip_ratio/low_mean": 0.0015070848130562808, "clip_ratio/low_min": 0.00012288177094887942, "clip_ratio/region_mean": 0.0028952946086064912, "epoch": 2.1399825021872267, "grad_norm": 0.24068570137023926, "learning_rate": 1e-06, "loss": -0.0546, "step": 916 }, { "clip_ratio/high_max": 0.0025139247190963943, "clip_ratio/high_mean": 0.0007882582540332805, "clip_ratio/low_mean": 0.00047680701118224533, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012650652824959252, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 1088.91748046875, "completions/mean_terminated_length": 619.424560546875, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 2.142315543890347, "grad_norm": 0.2798072099685669, "learning_rate": 1e-06, "loss": -0.0857, "num_tokens": 135479312.0, "reward": 0.6082589626312256, "reward_std": 0.14255832135677338, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841193318367004, "step": 917 }, { "clip_ratio/high_max": 0.0024859030891093425, "clip_ratio/high_mean": 0.0008973686544777593, "clip_ratio/low_mean": 0.0007205899273685645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016179585763893556, "epoch": 2.1446485855934676, "grad_norm": 0.2337394803762436, "learning_rate": 1e-06, "loss": -0.0859, "step": 918 }, { "clip_ratio/high_max": 0.0025061886408366263, "clip_ratio/high_mean": 0.0008488409621350002, "clip_ratio/low_mean": 0.0008249815755334566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016738224949222058, "epoch": 2.146981627296588, "grad_norm": 0.248878613114357, "learning_rate": 1e-06, "loss": -0.086, "step": 919 }, { "clip_ratio/high_max": 0.002762799762422219, "clip_ratio/high_mean": 0.0009419840571354143, "clip_ratio/low_mean": 0.0009382011185152805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018801851401804015, "epoch": 2.1493146689997085, "grad_norm": 0.22181767225265503, "learning_rate": 1e-06, "loss": -0.086, "step": 920 }, { "clip_ratio/high_max": 0.002830226774676703, "clip_ratio/high_mean": 0.0008858105793478899, "clip_ratio/low_mean": 0.0006732423416906386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001559052907396108, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 1169.8504638671875, "completions/mean_terminated_length": 627.9708862304688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.1516477107028287, "grad_norm": 0.36873382329940796, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 136043794.0, "reward": 0.551339328289032, "reward_std": 0.14327649772167206, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 921 }, { "clip_ratio/high_max": 0.0030737649358343333, "clip_ratio/high_mean": 0.0010548260834184475, "clip_ratio/low_mean": 0.0008977624897852365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019525885509210639, "epoch": 2.1539807524059493, "grad_norm": 0.31384843587875366, "learning_rate": 1e-06, "loss": -0.0432, "step": 922 }, { "clip_ratio/high_max": 0.002901628045947291, "clip_ratio/high_mean": 0.0010035861105279764, "clip_ratio/low_mean": 0.0010312153040104022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002034801429545041, "epoch": 2.1563137941090695, "grad_norm": 0.287413090467453, "learning_rate": 1e-06, "loss": -0.0433, "step": 923 }, { "clip_ratio/high_max": 0.0030378203809959814, "clip_ratio/high_mean": 0.0010518749622860923, "clip_ratio/low_mean": 0.001193243349007389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002245118303108029, "epoch": 2.15864683581219, "grad_norm": 0.2901656925678253, "learning_rate": 1e-06, "loss": -0.0434, "step": 924 }, { "clip_ratio/high_max": 0.0021673360315617174, "clip_ratio/high_mean": 0.0007874278126109857, "clip_ratio/low_mean": 0.00045988218789716484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012473100287024863, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 1261.01904296875, "completions/mean_terminated_length": 695.5408325195312, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.1609798775153104, "grad_norm": 0.24593819677829742, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 136653739.0, "reward": 0.5390625, "reward_std": 0.13880637288093567, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 925 }, { "clip_ratio/high_max": 0.0024482507360517047, "clip_ratio/high_mean": 0.0008538253787264694, "clip_ratio/low_mean": 0.0006132278729182872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014670532691525295, "epoch": 2.163312919218431, "grad_norm": 0.2377663403749466, "learning_rate": 1e-06, "loss": -0.0538, "step": 926 }, { "clip_ratio/high_max": 0.0023770478946971707, "clip_ratio/high_mean": 0.0008966462391981622, "clip_ratio/low_mean": 0.0008122432655000011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001708889496512711, "epoch": 2.1656459609215517, "grad_norm": 0.27022865414619446, "learning_rate": 1e-06, "loss": -0.054, "step": 927 }, { "clip_ratio/high_max": 0.002444057034153957, "clip_ratio/high_mean": 0.0008821404062473448, "clip_ratio/low_mean": 0.000945021546613134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00182716193376109, "epoch": 2.167979002624672, "grad_norm": 0.25843802094459534, "learning_rate": 1e-06, "loss": -0.0541, "step": 928 }, { "clip_ratio/high_max": 0.0021551562094828114, "clip_ratio/high_mean": 0.0008665754467074294, "clip_ratio/low_mean": 0.0007805696077411994, "clip_ratio/low_min": 6.278415912674973e-05, "clip_ratio/region_mean": 0.0016471450726385228, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 1148.51904296875, "completions/mean_terminated_length": 688.3316650390625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 2.1703120443277926, "grad_norm": 0.3312126696109772, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 137278460.0, "reward": 0.5167410969734192, "reward_std": 0.17908243834972382, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 929 }, { "clip_ratio/high_max": 0.0025835675405687653, "clip_ratio/high_mean": 0.001081595979485428, "clip_ratio/low_mean": 0.0010114546475961106, "clip_ratio/low_min": 0.00011477376574475784, "clip_ratio/region_mean": 0.002093050668918295, "epoch": 2.1726450860309128, "grad_norm": 0.2656216621398926, "learning_rate": 1e-06, "loss": -0.0381, "step": 930 }, { "clip_ratio/high_max": 0.00256100272963522, "clip_ratio/high_mean": 0.0010460949542903109, "clip_ratio/low_mean": 0.0011754417610063683, "clip_ratio/low_min": 0.00011822393389593344, "clip_ratio/region_mean": 0.0022215367353055626, "epoch": 2.1749781277340334, "grad_norm": 0.2628580927848816, "learning_rate": 1e-06, "loss": -0.0383, "step": 931 }, { "clip_ratio/high_max": 0.0022051738706068136, "clip_ratio/high_mean": 0.000955240908297128, "clip_ratio/low_mean": 0.0014340925736178178, "clip_ratio/low_min": 0.0001505113068560604, "clip_ratio/region_mean": 0.0023893334437161684, "epoch": 2.1773111694371536, "grad_norm": 0.26694920659065247, "learning_rate": 1e-06, "loss": -0.0384, "step": 932 }, { "clip_ratio/high_max": 0.002139591691957321, "clip_ratio/high_mean": 0.0007941814437799621, "clip_ratio/low_mean": 0.0005919145351072075, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013860959879821166, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3572.0, "completions/mean_length": 1079.03125, "completions/mean_terminated_length": 634.7913208007812, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.1796442111402743, "grad_norm": 0.3061717748641968, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 137881496.0, "reward": 0.5089285969734192, "reward_std": 0.16006723046302795, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 933 }, { "clip_ratio/high_max": 0.0023438950229319744, "clip_ratio/high_mean": 0.000926869637623895, "clip_ratio/low_mean": 0.0009254118449462112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018522815080359578, "epoch": 2.1819772528433945, "grad_norm": 0.24494388699531555, "learning_rate": 1e-06, "loss": -0.0442, "step": 934 }, { "clip_ratio/high_max": 0.0024403161805821583, "clip_ratio/high_mean": 0.0009846319130701886, "clip_ratio/low_mean": 0.0010780941793200327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002062726103758905, "epoch": 2.184310294546515, "grad_norm": 0.21183714270591736, "learning_rate": 1e-06, "loss": -0.0444, "step": 935 }, { "clip_ratio/high_max": 0.0020901129355479497, "clip_ratio/high_mean": 0.0008741297460801434, "clip_ratio/low_mean": 0.0011600875404838007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002034217286563944, "epoch": 2.1866433362496354, "grad_norm": 0.24384094774723053, "learning_rate": 1e-06, "loss": -0.0443, "step": 936 }, { "clip_ratio/high_max": 0.001768199923390057, "clip_ratio/high_mean": 0.000634681118754088, "clip_ratio/low_mean": 0.0006225188540156523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012571999686770141, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3038.0, "completions/mean_length": 1036.712158203125, "completions/mean_terminated_length": 586.24072265625, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 2.188976377952756, "grad_norm": 0.32948145270347595, "learning_rate": 1e-06, "loss": -0.0378, "num_tokens": 138426646.0, "reward": 0.5803571939468384, "reward_std": 0.15818998217582703, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 937 }, { "clip_ratio/high_max": 0.002090778416459216, "clip_ratio/high_mean": 0.0008678984213474905, "clip_ratio/low_mean": 0.0008075540590652963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001675452458584914, "epoch": 2.1913094196558762, "grad_norm": 0.43128690123558044, "learning_rate": 1e-06, "loss": -0.038, "step": 938 }, { "clip_ratio/high_max": 0.0024906518374336883, "clip_ratio/high_mean": 0.0009043059508258011, "clip_ratio/low_mean": 0.0010591671789370594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00196347308519762, "epoch": 2.193642461358997, "grad_norm": 0.21879757940769196, "learning_rate": 1e-06, "loss": -0.0383, "step": 939 }, { "clip_ratio/high_max": 0.0023015859405859374, "clip_ratio/high_mean": 0.0008173199948942056, "clip_ratio/low_mean": 0.001166840183941531, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019841601388179697, "epoch": 2.195975503062117, "grad_norm": 0.22823746502399445, "learning_rate": 1e-06, "loss": -0.0382, "step": 940 }, { "clip_ratio/high_max": 0.0020483883927227, "clip_ratio/high_mean": 0.0007574281025881646, "clip_ratio/low_mean": 0.00047466508294746745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012320932182774413, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3623.0, "completions/mean_length": 1117.8504638671875, "completions/mean_terminated_length": 630.516845703125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.1983085447652377, "grad_norm": 0.30982983112335205, "learning_rate": 1e-06, "loss": -0.0259, "num_tokens": 139001576.0, "reward": 0.574776828289032, "reward_std": 0.15090152621269226, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 941 }, { "clip_ratio/high_max": 0.0023883046815171838, "clip_ratio/high_mean": 0.0010080747797474032, "clip_ratio/low_mean": 0.0007136938784242375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017217686836374924, "epoch": 2.200641586468358, "grad_norm": 0.29436051845550537, "learning_rate": 1e-06, "loss": -0.0261, "step": 942 }, { "clip_ratio/high_max": 0.0025798613351071253, "clip_ratio/high_mean": 0.000991768474705168, "clip_ratio/low_mean": 0.0008289616853289772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001820730212784838, "epoch": 2.2029746281714786, "grad_norm": 0.22793766856193542, "learning_rate": 1e-06, "loss": -0.0262, "step": 943 }, { "clip_ratio/high_max": 0.0023700841338722967, "clip_ratio/high_mean": 0.00098240349052503, "clip_ratio/low_mean": 0.0010814499692060053, "clip_ratio/low_min": 1.6587049685767852e-05, "clip_ratio/region_mean": 0.0020638534988393076, "epoch": 2.205307669874599, "grad_norm": 0.26359623670578003, "learning_rate": 1e-06, "loss": -0.0263, "step": 944 }, { "clip_ratio/high_max": 0.001843147969339043, "clip_ratio/high_mean": 0.0006260225245569018, "clip_ratio/low_mean": 0.000724763362086378, "clip_ratio/low_min": 4.9626807594904676e-05, "clip_ratio/region_mean": 0.0013507858820958063, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2832.0, "completions/mean_length": 1200.65185546875, "completions/mean_terminated_length": 664.4761962890625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.2076407115777195, "grad_norm": 0.33370280265808105, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 139587328.0, "reward": 0.5334821939468384, "reward_std": 0.15729349851608276, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 945 }, { "clip_ratio/high_max": 0.0019799328547378536, "clip_ratio/high_mean": 0.0007350524265348213, "clip_ratio/low_mean": 0.0010777700445032679, "clip_ratio/low_min": 4.0787468606140465e-05, "clip_ratio/region_mean": 0.0018128224764950573, "epoch": 2.20997375328084, "grad_norm": 0.22046437859535217, "learning_rate": 1e-06, "loss": -0.0474, "step": 946 }, { "clip_ratio/high_max": 0.002227421813586261, "clip_ratio/high_mean": 0.0007772156586725032, "clip_ratio/low_mean": 0.0011329547014611308, "clip_ratio/low_min": 6.978042074479163e-05, "clip_ratio/region_mean": 0.0019101703655906022, "epoch": 2.2123067949839603, "grad_norm": 0.25371313095092773, "learning_rate": 1e-06, "loss": -0.0475, "step": 947 }, { "clip_ratio/high_max": 0.002114128517860081, "clip_ratio/high_mean": 0.0007653627180843614, "clip_ratio/low_mean": 0.001256132894923212, "clip_ratio/low_min": 5.815035183331929e-05, "clip_ratio/region_mean": 0.002021495543885976, "epoch": 2.214639836687081, "grad_norm": 0.2694165110588074, "learning_rate": 1e-06, "loss": -0.0476, "step": 948 }, { "clip_ratio/high_max": 0.0016557084200030658, "clip_ratio/high_mean": 0.0005161424414836802, "clip_ratio/low_mean": 0.000461753966192191, "clip_ratio/low_min": 4.251655445841607e-05, "clip_ratio/region_mean": 0.0009778964176803129, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3777.0, "completions/mean_length": 1111.8170166015625, "completions/mean_terminated_length": 618.98046875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.216972878390201, "grad_norm": 0.276732474565506, "learning_rate": 1e-06, "loss": -0.041, "num_tokens": 140145060.0, "reward": 0.5212053656578064, "reward_std": 0.13609540462493896, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 949 }, { "clip_ratio/high_max": 0.002127289764757734, "clip_ratio/high_mean": 0.0007239664619191899, "clip_ratio/low_mean": 0.000746685138437897, "clip_ratio/low_min": 4.861864908889402e-05, "clip_ratio/region_mean": 0.001470651630370412, "epoch": 2.219305920093322, "grad_norm": 0.23614975810050964, "learning_rate": 1e-06, "loss": -0.0412, "step": 950 }, { "clip_ratio/high_max": 0.00227067967352923, "clip_ratio/high_mean": 0.0007507983964387677, "clip_ratio/low_mean": 0.0008739369732211344, "clip_ratio/low_min": 4.017141691292636e-05, "clip_ratio/region_mean": 0.0016247353378275875, "epoch": 2.221638961796442, "grad_norm": 0.18173855543136597, "learning_rate": 1e-06, "loss": -0.0414, "step": 951 }, { "clip_ratio/high_max": 0.002109660468704533, "clip_ratio/high_mean": 0.000666342064505443, "clip_ratio/low_mean": 0.001001883705612272, "clip_ratio/low_min": 6.294832837738795e-05, "clip_ratio/region_mean": 0.0016682257810316514, "epoch": 2.2239720034995627, "grad_norm": 0.2091158926486969, "learning_rate": 1e-06, "loss": -0.0414, "step": 952 }, { "clip_ratio/high_max": 0.0026642042430466972, "clip_ratio/high_mean": 0.0009831271636357997, "clip_ratio/low_mean": 0.0006934364646440372, "clip_ratio/low_min": 2.9322072805371135e-05, "clip_ratio/region_mean": 0.0016765636173659004, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 1180.571533203125, "completions/mean_terminated_length": 608.384521484375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.226305045202683, "grad_norm": 0.3571631610393524, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 140691932.0, "reward": 0.535714328289032, "reward_std": 0.1716071218252182, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 953 }, { "clip_ratio/high_max": 0.00303604166401783, "clip_ratio/high_mean": 0.0010800326363096246, "clip_ratio/low_mean": 0.0009275444572267588, "clip_ratio/low_min": 6.189139821799472e-05, "clip_ratio/region_mean": 0.0020075770444236696, "epoch": 2.2286380869058036, "grad_norm": 0.2970696985721588, "learning_rate": 1e-06, "loss": -0.0409, "step": 954 }, { "clip_ratio/high_max": 0.0035131540935253724, "clip_ratio/high_mean": 0.0012164544277766254, "clip_ratio/low_mean": 0.0012272803178348113, "clip_ratio/low_min": 0.00010359116276958957, "clip_ratio/region_mean": 0.0024437347674393095, "epoch": 2.2309711286089238, "grad_norm": 0.2447551190853119, "learning_rate": 1e-06, "loss": -0.0411, "step": 955 }, { "clip_ratio/high_max": 0.0028052897614543326, "clip_ratio/high_mean": 0.0010593809638521634, "clip_ratio/low_mean": 0.0014988202528911643, "clip_ratio/low_min": 0.0001340980379609391, "clip_ratio/region_mean": 0.0025582012021914124, "epoch": 2.2333041703120444, "grad_norm": 0.2844543755054474, "learning_rate": 1e-06, "loss": -0.0412, "step": 956 }, { "clip_ratio/high_max": 0.0024161854744306765, "clip_ratio/high_mean": 0.0010049222473753616, "clip_ratio/low_mean": 0.0004824429756808968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014873652544338256, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3811.0, "completions/mean_length": 1056.2723388671875, "completions/mean_terminated_length": 604.2102661132812, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.2356372120151646, "grad_norm": 0.3601495325565338, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 141238360.0, "reward": 0.6484375, "reward_std": 0.16450665891170502, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 957 }, { "clip_ratio/high_max": 0.0032573289645370096, "clip_ratio/high_mean": 0.0013352053574635647, "clip_ratio/low_mean": 0.0007881219216869795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021233272418612614, "epoch": 2.2379702537182853, "grad_norm": 0.2749025523662567, "learning_rate": 1e-06, "loss": -0.0442, "step": 958 }, { "clip_ratio/high_max": 0.003161405933497008, "clip_ratio/high_mean": 0.0013238063256721944, "clip_ratio/low_mean": 0.0008990651913336478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002222871538833715, "epoch": 2.2403032954214055, "grad_norm": 0.24613353610038757, "learning_rate": 1e-06, "loss": -0.0444, "step": 959 }, { "clip_ratio/high_max": 0.002812174214341212, "clip_ratio/high_mean": 0.0011950203588639852, "clip_ratio/low_mean": 0.0010793403234856669, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002274360682349652, "epoch": 2.242636337124526, "grad_norm": 0.35043787956237793, "learning_rate": 1e-06, "loss": -0.0445, "step": 960 }, { "clip_ratio/high_max": 0.002419755546725355, "clip_ratio/high_mean": 0.0008969339869508985, "clip_ratio/low_mean": 0.00043708002590392425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013340140030777548, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2158.0, "completions/mean_length": 1127.540283203125, "completions/mean_terminated_length": 587.1082153320312, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.2449693788276464, "grad_norm": 0.3806179463863373, "learning_rate": 1e-06, "loss": -0.0803, "num_tokens": 141771868.0, "reward": 0.5870535969734192, "reward_std": 0.16172580420970917, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 961 }, { "clip_ratio/high_max": 0.0030022057762835175, "clip_ratio/high_mean": 0.001246008978341706, "clip_ratio/low_mean": 0.000651918781841232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018979277519974858, "epoch": 2.247302420530767, "grad_norm": 0.27406615018844604, "learning_rate": 1e-06, "loss": -0.0807, "step": 962 }, { "clip_ratio/high_max": 0.0028254452408873476, "clip_ratio/high_mean": 0.0011592298033065163, "clip_ratio/low_mean": 0.0008223621834986261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019815919440588914, "epoch": 2.249635462233887, "grad_norm": 0.26929888129234314, "learning_rate": 1e-06, "loss": -0.0808, "step": 963 }, { "clip_ratio/high_max": 0.0026136957967537455, "clip_ratio/high_mean": 0.0010887508469750173, "clip_ratio/low_mean": 0.0009752080068210489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020639588838093914, "epoch": 2.251968503937008, "grad_norm": 0.27124956250190735, "learning_rate": 1e-06, "loss": -0.0809, "step": 964 }, { "clip_ratio/high_max": 0.0022181659478519578, "clip_ratio/high_mean": 0.000915030407213635, "clip_ratio/low_mean": 0.0006308629726845538, "clip_ratio/low_min": 2.5375558834639378e-05, "clip_ratio/region_mean": 0.0015458933776244521, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 1011.0848388671875, "completions/mean_terminated_length": 623.5326538085938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.2543015456401285, "grad_norm": 0.3349124789237976, "learning_rate": 1e-06, "loss": -0.0567, "num_tokens": 142348168.0, "reward": 0.6261160969734192, "reward_std": 0.1795709878206253, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 965 }, { "clip_ratio/high_max": 0.002503495335986372, "clip_ratio/high_mean": 0.0010731359543569852, "clip_ratio/low_mean": 0.000809296878742316, "clip_ratio/low_min": 9.833228432398755e-06, "clip_ratio/region_mean": 0.0018824328071787022, "epoch": 2.2566345873432487, "grad_norm": 0.2904164493083954, "learning_rate": 1e-06, "loss": -0.0569, "step": 966 }, { "clip_ratio/high_max": 0.0024661056631885003, "clip_ratio/high_mean": 0.0010312086633348372, "clip_ratio/low_mean": 0.0010948240997095127, "clip_ratio/low_min": 2.2986392650636844e-05, "clip_ratio/region_mean": 0.0021260327084746677, "epoch": 2.2589676290463694, "grad_norm": 0.2515934109687805, "learning_rate": 1e-06, "loss": -0.0571, "step": 967 }, { "clip_ratio/high_max": 0.0027019068111258093, "clip_ratio/high_mean": 0.00110522161321569, "clip_ratio/low_mean": 0.0012917765252495883, "clip_ratio/low_min": 6.47084725642344e-05, "clip_ratio/region_mean": 0.0023969980975380167, "epoch": 2.2613006707494896, "grad_norm": 0.2741507887840271, "learning_rate": 1e-06, "loss": -0.0572, "step": 968 }, { "clip_ratio/high_max": 0.002199630605900893, "clip_ratio/high_mean": 0.0008182023684639717, "clip_ratio/low_mean": 0.0006519424650832661, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014701447835250292, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3819.0, "completions/mean_length": 1000.7511596679688, "completions/mean_terminated_length": 624.9824829101562, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 2.2636337124526102, "grad_norm": 0.3289185166358948, "learning_rate": 1e-06, "loss": -0.0485, "num_tokens": 142933185.0, "reward": 0.598214328289032, "reward_std": 0.16198793053627014, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 969 }, { "clip_ratio/high_max": 0.0025676776858745143, "clip_ratio/high_mean": 0.0009590076369931921, "clip_ratio/low_mean": 0.0008741687543079024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018331763858441263, "epoch": 2.2659667541557305, "grad_norm": 0.2584741711616516, "learning_rate": 1e-06, "loss": -0.0487, "step": 970 }, { "clip_ratio/high_max": 0.002799642810714431, "clip_ratio/high_mean": 0.0010030778466898482, "clip_ratio/low_mean": 0.0010420067264931276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020450845404411666, "epoch": 2.268299795858851, "grad_norm": 0.23060286045074463, "learning_rate": 1e-06, "loss": -0.0489, "step": 971 }, { "clip_ratio/high_max": 0.002635231299791485, "clip_ratio/high_mean": 0.0009523928201815579, "clip_ratio/low_mean": 0.001234156496138894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021865493181394413, "epoch": 2.2706328375619713, "grad_norm": 0.23002904653549194, "learning_rate": 1e-06, "loss": -0.049, "step": 972 }, { "clip_ratio/high_max": 0.0028391991509124637, "clip_ratio/high_mean": 0.001106439893192146, "clip_ratio/low_mean": 0.0006090573533583665, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017154972338175867, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 1066.6551513671875, "completions/mean_terminated_length": 557.1564331054688, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.272965879265092, "grad_norm": 0.3089137673377991, "learning_rate": 1e-06, "loss": -0.0821, "num_tokens": 143437588.0, "reward": 0.645089328289032, "reward_std": 0.15808121860027313, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 973 }, { "clip_ratio/high_max": 0.0032532387995161116, "clip_ratio/high_mean": 0.0012893593338958453, "clip_ratio/low_mean": 0.0007900426817286643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020794020492758136, "epoch": 2.275298920968212, "grad_norm": 0.2660689949989319, "learning_rate": 1e-06, "loss": -0.0822, "step": 974 }, { "clip_ratio/high_max": 0.0037110813573235646, "clip_ratio/high_mean": 0.0013476625608745962, "clip_ratio/low_mean": 0.0009197242743539391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00226738685159944, "epoch": 2.277631962671333, "grad_norm": 0.2527943253517151, "learning_rate": 1e-06, "loss": -0.0825, "step": 975 }, { "clip_ratio/high_max": 0.0033753456664271653, "clip_ratio/high_mean": 0.001257763993635308, "clip_ratio/low_mean": 0.0011084291199949803, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002366193075431511, "epoch": 2.279965004374453, "grad_norm": 0.2624468505382538, "learning_rate": 1e-06, "loss": -0.0825, "step": 976 }, { "clip_ratio/high_max": 0.0023874520557001233, "clip_ratio/high_mean": 0.0010449714463902637, "clip_ratio/low_mean": 0.0005359301885619061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015809016913408414, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3252.0, "completions/mean_length": 1222.703125, "completions/mean_terminated_length": 644.9624633789062, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 2.2822980460775737, "grad_norm": 0.36057326197624207, "learning_rate": 1e-06, "loss": -0.0536, "num_tokens": 144001986.0, "reward": 0.578125, "reward_std": 0.17299975454807281, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 977 }, { "clip_ratio/high_max": 0.0027978644429822452, "clip_ratio/high_mean": 0.0011361790857336018, "clip_ratio/low_mean": 0.0007619837706442922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018981628454639576, "epoch": 2.284631087780694, "grad_norm": 0.2731843888759613, "learning_rate": 1e-06, "loss": -0.0537, "step": 978 }, { "clip_ratio/high_max": 0.002700283053854946, "clip_ratio/high_mean": 0.0011772612706408836, "clip_ratio/low_mean": 0.000963448102993425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002140709380910266, "epoch": 2.2869641294838146, "grad_norm": 0.2100088894367218, "learning_rate": 1e-06, "loss": -0.0539, "step": 979 }, { "clip_ratio/high_max": 0.00309593380370643, "clip_ratio/high_mean": 0.0012225247410242446, "clip_ratio/low_mean": 0.001080663063476095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002303187829966191, "epoch": 2.289297171186935, "grad_norm": 0.21596242487430573, "learning_rate": 1e-06, "loss": -0.0539, "step": 980 }, { "clip_ratio/high_max": 0.002694579445233103, "clip_ratio/high_mean": 0.0012348649215709884, "clip_ratio/low_mean": 0.0006861911151645472, "clip_ratio/low_min": 4.292476296541281e-05, "clip_ratio/region_mean": 0.0019210560130886734, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 1203.9710693359375, "completions/mean_terminated_length": 654.7543334960938, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.2916302128900554, "grad_norm": 0.28912052512168884, "learning_rate": 1e-06, "loss": -0.0814, "num_tokens": 144588008.0, "reward": 0.5167410969734192, "reward_std": 0.20376215875148773, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 981 }, { "clip_ratio/high_max": 0.002998442665557377, "clip_ratio/high_mean": 0.0013558516875491478, "clip_ratio/low_mean": 0.0009762507397681475, "clip_ratio/low_min": 6.391544957295991e-05, "clip_ratio/region_mean": 0.0023321025000768714, "epoch": 2.2939632545931756, "grad_norm": 0.2508056163787842, "learning_rate": 1e-06, "loss": -0.0816, "step": 982 }, { "clip_ratio/high_max": 0.0032239833162748255, "clip_ratio/high_mean": 0.001394302129483549, "clip_ratio/low_mean": 0.0011156422642670805, "clip_ratio/low_min": 7.90116610005498e-05, "clip_ratio/region_mean": 0.002509944373741746, "epoch": 2.2962962962962963, "grad_norm": 0.2622165083885193, "learning_rate": 1e-06, "loss": -0.0818, "step": 983 }, { "clip_ratio/high_max": 0.0029617280670208856, "clip_ratio/high_mean": 0.0013805006765323924, "clip_ratio/low_mean": 0.0013676582530024461, "clip_ratio/low_min": 9.943220175046008e-05, "clip_ratio/region_mean": 0.0027481589568196796, "epoch": 2.298629337999417, "grad_norm": 0.2608332335948944, "learning_rate": 1e-06, "loss": -0.0819, "step": 984 }, { "clip_ratio/high_max": 0.002652029063028749, "clip_ratio/high_mean": 0.0007925968584459042, "clip_ratio/low_mean": 0.0007248047786561074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015174016079981811, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2870.0, "completions/mean_length": 1135.5882568359375, "completions/mean_terminated_length": 651.1571044921875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.300962379702537, "grad_norm": 0.3000766336917877, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 145185087.0, "reward": 0.559151828289032, "reward_std": 0.1624336987733841, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 985 }, { "clip_ratio/high_max": 0.0030103262324701063, "clip_ratio/high_mean": 0.0010665687896107556, "clip_ratio/low_mean": 0.0009453985421714606, "clip_ratio/low_min": 4.060199353261851e-05, "clip_ratio/region_mean": 0.002011967313592322, "epoch": 2.303295421405658, "grad_norm": 0.2218482494354248, "learning_rate": 1e-06, "loss": -0.054, "step": 986 }, { "clip_ratio/high_max": 0.002894998098781798, "clip_ratio/high_mean": 0.0009617551313567674, "clip_ratio/low_mean": 0.0010639281244948506, "clip_ratio/low_min": 1.2250097825017292e-05, "clip_ratio/region_mean": 0.002025683337706141, "epoch": 2.305628463108778, "grad_norm": 0.22861462831497192, "learning_rate": 1e-06, "loss": -0.0541, "step": 987 }, { "clip_ratio/high_max": 0.0030679903575219214, "clip_ratio/high_mean": 0.0010229348026769003, "clip_ratio/low_mean": 0.0012137892845203169, "clip_ratio/low_min": 1.2250097825017292e-05, "clip_ratio/region_mean": 0.002236724059912376, "epoch": 2.3079615048118987, "grad_norm": 0.2555590569972992, "learning_rate": 1e-06, "loss": -0.0542, "step": 988 }, { "clip_ratio/high_max": 0.0031955152153386734, "clip_ratio/high_mean": 0.0011687080059346044, "clip_ratio/low_mean": 0.0006758173572052328, "clip_ratio/low_min": 2.572965604485944e-05, "clip_ratio/region_mean": 0.0018445253372192383, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2483.0, "completions/mean_length": 1158.669677734375, "completions/mean_terminated_length": 572.7764282226562, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.310294546515019, "grad_norm": 0.3772180378437042, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 145703951.0, "reward": 0.5558035969734192, "reward_std": 0.1875448375940323, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 989 }, { "clip_ratio/high_max": 0.0033459850965300575, "clip_ratio/high_mean": 0.001259680666407803, "clip_ratio/low_mean": 0.0010045460803667083, "clip_ratio/low_min": 3.2691279557184316e-05, "clip_ratio/region_mean": 0.0022642267504124902, "epoch": 2.3126275882181395, "grad_norm": 0.29148250818252563, "learning_rate": 1e-06, "loss": -0.0664, "step": 990 }, { "clip_ratio/high_max": 0.003272478556027636, "clip_ratio/high_mean": 0.0013310912727320101, "clip_ratio/low_mean": 0.001179783198494988, "clip_ratio/low_min": 2.8312570066191256e-05, "clip_ratio/region_mean": 0.0025108745030593127, "epoch": 2.3149606299212597, "grad_norm": 0.3249438405036926, "learning_rate": 1e-06, "loss": -0.0665, "step": 991 }, { "clip_ratio/high_max": 0.0033494883828097954, "clip_ratio/high_mean": 0.0013296841680130456, "clip_ratio/low_mean": 0.0014279158840508899, "clip_ratio/low_min": 8.391755181946792e-05, "clip_ratio/region_mean": 0.0027576000647968613, "epoch": 2.3172936716243804, "grad_norm": 0.2720489203929901, "learning_rate": 1e-06, "loss": -0.0667, "step": 992 }, { "clip_ratio/high_max": 0.0027341580935171805, "clip_ratio/high_mean": 0.0010409573660581373, "clip_ratio/low_mean": 0.000581146584408998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001622103933186736, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3695.0, "completions/mean_length": 1190.938720703125, "completions/mean_terminated_length": 666.5731201171875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.3196267133275006, "grad_norm": 0.39617666602134705, "learning_rate": 1e-06, "loss": -0.0477, "num_tokens": 146300536.0, "reward": 0.5390625, "reward_std": 0.16044628620147705, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 993 }, { "clip_ratio/high_max": 0.0033796231291489676, "clip_ratio/high_mean": 0.001176894274976803, "clip_ratio/low_mean": 0.0008453242789983051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020222185557940975, "epoch": 2.3219597550306212, "grad_norm": 0.31302952766418457, "learning_rate": 1e-06, "loss": -0.0478, "step": 994 }, { "clip_ratio/high_max": 0.0033353502658428624, "clip_ratio/high_mean": 0.0012012331644655205, "clip_ratio/low_mean": 0.0009508791226835456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021521122835110873, "epoch": 2.3242927967337415, "grad_norm": 0.2497008889913559, "learning_rate": 1e-06, "loss": -0.0481, "step": 995 }, { "clip_ratio/high_max": 0.0029132952477084473, "clip_ratio/high_mean": 0.0010657403581717517, "clip_ratio/low_mean": 0.0011637679217528785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002229508289019577, "epoch": 2.326625838436862, "grad_norm": 0.224105566740036, "learning_rate": 1e-06, "loss": -0.0482, "step": 996 }, { "clip_ratio/high_max": 0.0020946715885656886, "clip_ratio/high_mean": 0.0008001404403330525, "clip_ratio/low_mean": 0.00042710497018561, "clip_ratio/low_min": 1.6129031791933812e-05, "clip_ratio/region_mean": 0.0012272453859623056, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3185.0, "completions/mean_length": 1075.9140625, "completions/mean_terminated_length": 644.4732055664062, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 2.3289588801399823, "grad_norm": 0.2584588825702667, "learning_rate": 1e-06, "loss": -0.0593, "num_tokens": 146888867.0, "reward": 0.6071428656578064, "reward_std": 0.15706203877925873, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 997 }, { "clip_ratio/high_max": 0.002415469105471857, "clip_ratio/high_mean": 0.0009024233272612037, "clip_ratio/low_mean": 0.0006431267738662427, "clip_ratio/low_min": 5.172770397621207e-05, "clip_ratio/region_mean": 0.0015455501197720878, "epoch": 2.331291921843103, "grad_norm": 0.29324856400489807, "learning_rate": 1e-06, "loss": -0.0595, "step": 998 }, { "clip_ratio/high_max": 0.0024191764314309694, "clip_ratio/high_mean": 0.0009480474327574484, "clip_ratio/low_mean": 0.0008097914069367107, "clip_ratio/low_min": 1.6129031791933812e-05, "clip_ratio/region_mean": 0.0017578388178662863, "epoch": 2.3336249635462236, "grad_norm": 0.20104141533374786, "learning_rate": 1e-06, "loss": -0.0596, "step": 999 }, { "clip_ratio/high_max": 0.0022805830703873653, "clip_ratio/high_mean": 0.0008394501201109961, "clip_ratio/low_mean": 0.0008450898640148807, "clip_ratio/low_min": 6.451612716773525e-05, "clip_ratio/region_mean": 0.0016845399732119404, "epoch": 2.335958005249344, "grad_norm": 0.20965102314949036, "learning_rate": 1e-06, "loss": -0.0597, "step": 1000 }, { "clip_ratio/high_max": 0.002165381643862929, "clip_ratio/high_mean": 0.000781822514909436, "clip_ratio/low_mean": 0.0005456448689074023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013274673838168383, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3491.0, "completions/mean_length": 1153.9107666015625, "completions/mean_terminated_length": 604.4609375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.338291046952464, "grad_norm": 0.3027136027812958, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 147437075.0, "reward": 0.5334821939468384, "reward_std": 0.13500885665416718, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1001 }, { "clip_ratio/high_max": 0.0026408979974803515, "clip_ratio/high_mean": 0.0009627417293813778, "clip_ratio/low_mean": 0.0007521888182964176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017149305313068908, "epoch": 2.3406240886555847, "grad_norm": 0.25148019194602966, "learning_rate": 1e-06, "loss": -0.0543, "step": 1002 }, { "clip_ratio/high_max": 0.0027020912239095196, "clip_ratio/high_mean": 0.0009959432627510978, "clip_ratio/low_mean": 0.0009145451731455978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019104883976979181, "epoch": 2.3429571303587053, "grad_norm": 0.20745614171028137, "learning_rate": 1e-06, "loss": -0.0545, "step": 1003 }, { "clip_ratio/high_max": 0.002835997562215198, "clip_ratio/high_mean": 0.000995350506855175, "clip_ratio/low_mean": 0.0011339418560964987, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002129292370227631, "epoch": 2.3452901720618256, "grad_norm": 0.23285433650016785, "learning_rate": 1e-06, "loss": -0.0546, "step": 1004 }, { "clip_ratio/high_max": 0.0025864409108180553, "clip_ratio/high_mean": 0.0009867948083410738, "clip_ratio/low_mean": 0.0006698022280033911, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016565970581723377, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2938.0, "completions/mean_length": 1144.094970703125, "completions/mean_terminated_length": 597.4457397460938, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.347623213764946, "grad_norm": 0.32017406821250916, "learning_rate": 1e-06, "loss": -0.0705, "num_tokens": 147974072.0, "reward": 0.5691964626312256, "reward_std": 0.17277081310749054, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1005 }, { "clip_ratio/high_max": 0.003541181984473951, "clip_ratio/high_mean": 0.0012657882925850572, "clip_ratio/low_mean": 0.0010236859016004018, "clip_ratio/low_min": 2.3710166715318337e-05, "clip_ratio/region_mean": 0.0022894742214703, "epoch": 2.3499562554680664, "grad_norm": 0.2364875078201294, "learning_rate": 1e-06, "loss": -0.0709, "step": 1006 }, { "clip_ratio/high_max": 0.0031734051372040994, "clip_ratio/high_mean": 0.0012652313234866597, "clip_ratio/low_mean": 0.0011457300988695351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002410961387795396, "epoch": 2.352289297171187, "grad_norm": 0.245115265250206, "learning_rate": 1e-06, "loss": -0.0709, "step": 1007 }, { "clip_ratio/high_max": 0.0032782024645712227, "clip_ratio/high_mean": 0.0012094823105144314, "clip_ratio/low_mean": 0.0013976999325677752, "clip_ratio/low_min": 2.3710166715318337e-05, "clip_ratio/region_mean": 0.002607182163046673, "epoch": 2.3546223388743073, "grad_norm": 0.21835707128047943, "learning_rate": 1e-06, "loss": -0.0711, "step": 1008 }, { "clip_ratio/high_max": 0.0016837205039337277, "clip_ratio/high_mean": 0.0006111365655669942, "clip_ratio/low_mean": 0.00039568928423250327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010068258379760664, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 1125.196533203125, "completions/mean_terminated_length": 621.0130615234375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.356955380577428, "grad_norm": 0.2512587904930115, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 148534904.0, "reward": 0.629464328289032, "reward_std": 0.13534656167030334, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1009 }, { "clip_ratio/high_max": 0.002148231546016177, "clip_ratio/high_mean": 0.0008400733331654919, "clip_ratio/low_mean": 0.0005824126833431365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014224859842215665, "epoch": 2.359288422280548, "grad_norm": 0.18119673430919647, "learning_rate": 1e-06, "loss": -0.0548, "step": 1010 }, { "clip_ratio/high_max": 0.0024765461348579265, "clip_ratio/high_mean": 0.0009033664991875412, "clip_ratio/low_mean": 0.0006529972429234476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015563637716695666, "epoch": 2.361621463983669, "grad_norm": 0.18178009986877441, "learning_rate": 1e-06, "loss": -0.0549, "step": 1011 }, { "clip_ratio/high_max": 0.0024374165113840718, "clip_ratio/high_mean": 0.000785118581916322, "clip_ratio/low_mean": 0.0007713841982877057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015565027970296796, "epoch": 2.363954505686789, "grad_norm": 0.16140949726104736, "learning_rate": 1e-06, "loss": -0.055, "step": 1012 }, { "clip_ratio/high_max": 0.001982963345653843, "clip_ratio/high_mean": 0.0006908731138537405, "clip_ratio/low_mean": 0.0006206093303262605, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013114824396325275, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2940.0, "completions/mean_length": 1175.548095703125, "completions/mean_terminated_length": 679.909912109375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 2.3662875473899097, "grad_norm": 0.28620150685310364, "learning_rate": 1e-06, "loss": -0.0503, "num_tokens": 149138171.0, "reward": 0.5613839626312256, "reward_std": 0.13654935359954834, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1013 }, { "clip_ratio/high_max": 0.0024065280231297947, "clip_ratio/high_mean": 0.0007314348376894486, "clip_ratio/low_mean": 0.000729183364455821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014606182121497113, "epoch": 2.36862058909303, "grad_norm": 0.20217272639274597, "learning_rate": 1e-06, "loss": -0.0505, "step": 1014 }, { "clip_ratio/high_max": 0.002407199404842686, "clip_ratio/high_mean": 0.0007553441428171936, "clip_ratio/low_mean": 0.0008421047932642978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015974489360814914, "epoch": 2.3709536307961505, "grad_norm": 0.22331973910331726, "learning_rate": 1e-06, "loss": -0.0505, "step": 1015 }, { "clip_ratio/high_max": 0.0020915623936161865, "clip_ratio/high_mean": 0.0007299595235963352, "clip_ratio/low_mean": 0.0010612890218908433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017912485091073904, "epoch": 2.3732866724992707, "grad_norm": 0.20704081654548645, "learning_rate": 1e-06, "loss": -0.0506, "step": 1016 }, { "clip_ratio/high_max": 0.0034728839236777276, "clip_ratio/high_mean": 0.0011303817245789105, "clip_ratio/low_mean": 0.0005705017920263344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017008835238812026, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3766.0, "completions/mean_length": 1060.7645263671875, "completions/mean_terminated_length": 631.578369140625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.3756197142023914, "grad_norm": 0.36774662137031555, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 149718992.0, "reward": 0.6171875, "reward_std": 0.1595052033662796, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1017 }, { "clip_ratio/high_max": 0.0044531360326800495, "clip_ratio/high_mean": 0.0013801455934299156, "clip_ratio/low_mean": 0.000763638318858284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002143783975043334, "epoch": 2.377952755905512, "grad_norm": 0.28642094135284424, "learning_rate": 1e-06, "loss": -0.0574, "step": 1018 }, { "clip_ratio/high_max": 0.005310886037477758, "clip_ratio/high_mean": 0.0015749102640256751, "clip_ratio/low_mean": 0.0009763622192622279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025512724605505355, "epoch": 2.3802857976086322, "grad_norm": 0.30652979016304016, "learning_rate": 1e-06, "loss": -0.0576, "step": 1019 }, { "clip_ratio/high_max": 0.005440009721496608, "clip_ratio/high_mean": 0.001479281030697166, "clip_ratio/low_mean": 0.0011621802077570464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026414612657390535, "epoch": 2.382618839311753, "grad_norm": 0.2900792062282562, "learning_rate": 1e-06, "loss": -0.0576, "step": 1020 }, { "clip_ratio/high_max": 0.0016350403457181528, "clip_ratio/high_mean": 0.0006149514538265066, "clip_ratio/low_mean": 0.00037639642846443166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000991347873423365, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 1115.727783203125, "completions/mean_terminated_length": 698.6412353515625, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 2.384951881014873, "grad_norm": 0.22175031900405884, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 150361820.0, "reward": 0.5491071939468384, "reward_std": 0.13354600965976715, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1021 }, { "clip_ratio/high_max": 0.001988006792089436, "clip_ratio/high_mean": 0.000717979422006465, "clip_ratio/low_mean": 0.0005121810263517546, "clip_ratio/low_min": 1.3724198652198538e-05, "clip_ratio/region_mean": 0.0012301604547246825, "epoch": 2.3872849227179938, "grad_norm": 0.21414272487163544, "learning_rate": 1e-06, "loss": -0.0309, "step": 1022 }, { "clip_ratio/high_max": 0.0021304474903445225, "clip_ratio/high_mean": 0.0008804438002698589, "clip_ratio/low_mean": 0.000570450026771141, "clip_ratio/low_min": 1.941295158758294e-05, "clip_ratio/region_mean": 0.001450893840228673, "epoch": 2.389617964421114, "grad_norm": 0.22635546326637268, "learning_rate": 1e-06, "loss": -0.0311, "step": 1023 }, { "clip_ratio/high_max": 0.0018546131504990626, "clip_ratio/high_mean": 0.000691392498993082, "clip_ratio/low_mean": 0.000757499012252083, "clip_ratio/low_min": 2.911942829086911e-05, "clip_ratio/region_mean": 0.0014488915076071862, "epoch": 2.3919510061242346, "grad_norm": 0.17617785930633545, "learning_rate": 1e-06, "loss": -0.0311, "step": 1024 }, { "clip_ratio/high_max": 0.002508030127501115, "clip_ratio/high_mean": 0.0010209910560661228, "clip_ratio/low_mean": 0.0005794368807983119, "clip_ratio/low_min": 1.5337423974415287e-05, "clip_ratio/region_mean": 0.0016004279714252334, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 1116.724365234375, "completions/mean_terminated_length": 656.0115966796875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.394284047827355, "grad_norm": 0.2999490201473236, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 150959397.0, "reward": 0.578125, "reward_std": 0.17739249765872955, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 1025 }, { "clip_ratio/high_max": 0.002997482515638694, "clip_ratio/high_mean": 0.0012299517911742441, "clip_ratio/low_mean": 0.0007308456479222514, "clip_ratio/low_min": 1.5337423974415287e-05, "clip_ratio/region_mean": 0.001960797468200326, "epoch": 2.3966170895304755, "grad_norm": 0.2786708176136017, "learning_rate": 1e-06, "loss": -0.0553, "step": 1026 }, { "clip_ratio/high_max": 0.003061028866795823, "clip_ratio/high_mean": 0.0012740095735352952, "clip_ratio/low_mean": 0.0009666674104664708, "clip_ratio/low_min": 1.5337423974415287e-05, "clip_ratio/region_mean": 0.002240676956716925, "epoch": 2.3989501312335957, "grad_norm": 0.30744946002960205, "learning_rate": 1e-06, "loss": -0.0554, "step": 1027 }, { "clip_ratio/high_max": 0.0029087264701956883, "clip_ratio/high_mean": 0.0011492716112115886, "clip_ratio/low_mean": 0.0011242707532801433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022735423772246577, "epoch": 2.4012831729367163, "grad_norm": 0.19937247037887573, "learning_rate": 1e-06, "loss": -0.0555, "step": 1028 }, { "clip_ratio/high_max": 0.0026239179860567674, "clip_ratio/high_mean": 0.0011437333923822735, "clip_ratio/low_mean": 0.0006050125793990446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017487459990661591, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 1022.3281860351562, "completions/mean_terminated_length": 601.0634155273438, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.4036162146398365, "grad_norm": 0.3392671048641205, "learning_rate": 1e-06, "loss": -0.0527, "num_tokens": 151517939.0, "reward": 0.625, "reward_std": 0.1799846738576889, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1029 }, { "clip_ratio/high_max": 0.002875018493796233, "clip_ratio/high_mean": 0.0012252770939085167, "clip_ratio/low_mean": 0.0008593679367550067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002084644991555251, "epoch": 2.405949256342957, "grad_norm": 0.3104827404022217, "learning_rate": 1e-06, "loss": -0.053, "step": 1030 }, { "clip_ratio/high_max": 0.0031917464584694244, "clip_ratio/high_mean": 0.001287612994929077, "clip_ratio/low_mean": 0.0010495392143639037, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023371522111119702, "epoch": 2.4082822980460774, "grad_norm": 0.29952681064605713, "learning_rate": 1e-06, "loss": -0.0532, "step": 1031 }, { "clip_ratio/high_max": 0.0029809527841280214, "clip_ratio/high_mean": 0.0011874449846800417, "clip_ratio/low_mean": 0.0012573567855724832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024448017647955567, "epoch": 2.410615339749198, "grad_norm": 0.24343644082546234, "learning_rate": 1e-06, "loss": -0.0533, "step": 1032 }, { "clip_ratio/high_max": 0.002099279656249564, "clip_ratio/high_mean": 0.0008029598102439195, "clip_ratio/low_mean": 0.000499091003803187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013020508013141807, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3196.0, "completions/mean_length": 1026.536865234375, "completions/mean_terminated_length": 627.85498046875, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 2.4129483814523183, "grad_norm": 0.28373295068740845, "learning_rate": 1e-06, "loss": -0.0644, "num_tokens": 152107364.0, "reward": 0.6160714626312256, "reward_std": 0.16743192076683044, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1033 }, { "clip_ratio/high_max": 0.0028957660470041446, "clip_ratio/high_mean": 0.0010994240874424577, "clip_ratio/low_mean": 0.000717490160241141, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018169142858823761, "epoch": 2.415281423155439, "grad_norm": 0.22463740408420563, "learning_rate": 1e-06, "loss": -0.0646, "step": 1034 }, { "clip_ratio/high_max": 0.0024197215352614876, "clip_ratio/high_mean": 0.0009790176372916903, "clip_ratio/low_mean": 0.0008822561085253255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00186127373308409, "epoch": 2.417614464858559, "grad_norm": 0.24838964641094208, "learning_rate": 1e-06, "loss": -0.0648, "step": 1035 }, { "clip_ratio/high_max": 0.002677409182069823, "clip_ratio/high_mean": 0.0010236194138997234, "clip_ratio/low_mean": 0.0010017153799708467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002025334812060464, "epoch": 2.41994750656168, "grad_norm": 0.21755583584308624, "learning_rate": 1e-06, "loss": -0.0648, "step": 1036 }, { "clip_ratio/high_max": 0.0024117432403727435, "clip_ratio/high_mean": 0.0010259137743560132, "clip_ratio/low_mean": 0.0008399565867875936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018658704066183418, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3989.0, "completions/mean_length": 1220.51904296875, "completions/mean_terminated_length": 637.703369140625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.4222805482648004, "grad_norm": 0.39747315645217896, "learning_rate": 1e-06, "loss": -0.0534, "num_tokens": 152674085.0, "reward": 0.5100446939468384, "reward_std": 0.2037724405527115, "rewards/verify_math_reward/mean": 0.5100446343421936, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1037 }, { "clip_ratio/high_max": 0.0029730666137766093, "clip_ratio/high_mean": 0.00125672075591865, "clip_ratio/low_mean": 0.0010882800452236552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00234500078659039, "epoch": 2.4246135899679206, "grad_norm": 0.45811083912849426, "learning_rate": 1e-06, "loss": -0.0538, "step": 1038 }, { "clip_ratio/high_max": 0.0027848283207276836, "clip_ratio/high_mean": 0.0011850921364384703, "clip_ratio/low_mean": 0.0014201014419086277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026051936220028438, "epoch": 2.4269466316710413, "grad_norm": 0.25683873891830444, "learning_rate": 1e-06, "loss": -0.0539, "step": 1039 }, { "clip_ratio/high_max": 0.0030117457718006335, "clip_ratio/high_mean": 0.001205594224302331, "clip_ratio/low_mean": 0.001628739686566405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028343339363345876, "epoch": 2.4292796733741615, "grad_norm": 0.2934616208076477, "learning_rate": 1e-06, "loss": -0.0541, "step": 1040 }, { "clip_ratio/high_max": 0.0023620007559657097, "clip_ratio/high_mean": 0.0009517699872958474, "clip_ratio/low_mean": 0.0006037206144355878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015554906567558646, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2973.0, "completions/mean_length": 1104.765625, "completions/mean_terminated_length": 619.805419921875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.431612715077282, "grad_norm": 0.32303157448768616, "learning_rate": 1e-06, "loss": -0.0482, "num_tokens": 153247235.0, "reward": 0.5524553656578064, "reward_std": 0.17922443151474, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1041 }, { "clip_ratio/high_max": 0.0029223363890196197, "clip_ratio/high_mean": 0.0011394780394766713, "clip_ratio/low_mean": 0.0009088044116651872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020482824838836677, "epoch": 2.4339457567804024, "grad_norm": 0.22415462136268616, "learning_rate": 1e-06, "loss": -0.0485, "step": 1042 }, { "clip_ratio/high_max": 0.002557893552875612, "clip_ratio/high_mean": 0.0010014909148594597, "clip_ratio/low_mean": 0.0011282869018032216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002129777829395607, "epoch": 2.436278798483523, "grad_norm": 0.24331408739089966, "learning_rate": 1e-06, "loss": -0.0486, "step": 1043 }, { "clip_ratio/high_max": 0.0026559678371995687, "clip_ratio/high_mean": 0.0010543445932853501, "clip_ratio/low_mean": 0.0012289675414649537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002283312140207272, "epoch": 2.4386118401866432, "grad_norm": 0.24428428709506989, "learning_rate": 1e-06, "loss": -0.0487, "step": 1044 }, { "clip_ratio/high_max": 0.0021358276244427543, "clip_ratio/high_mean": 0.000681489653288736, "clip_ratio/low_mean": 0.0005759707082688692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001257460353372153, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2308.0, "completions/mean_length": 1167.421875, "completions/mean_terminated_length": 615.8859252929688, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 2.440944881889764, "grad_norm": 0.34599801898002625, "learning_rate": 1e-06, "loss": -0.0598, "num_tokens": 153800453.0, "reward": 0.5569196939468384, "reward_std": 0.1539819985628128, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1045 }, { "clip_ratio/high_max": 0.0024830914499034407, "clip_ratio/high_mean": 0.0008351252122338337, "clip_ratio/low_mean": 0.0009217281767632812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017568534167367034, "epoch": 2.443277923592884, "grad_norm": 0.23306939005851746, "learning_rate": 1e-06, "loss": -0.0601, "step": 1046 }, { "clip_ratio/high_max": 0.002836348914570408, "clip_ratio/high_mean": 0.0009229661163772107, "clip_ratio/low_mean": 0.0010264891225233441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019494552543619648, "epoch": 2.4456109652960047, "grad_norm": 0.2903509736061096, "learning_rate": 1e-06, "loss": -0.0603, "step": 1047 }, { "clip_ratio/high_max": 0.0025230854234905564, "clip_ratio/high_mean": 0.0007946755479224521, "clip_ratio/low_mean": 0.0012072050776623655, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020018806608277373, "epoch": 2.447944006999125, "grad_norm": 0.23268888890743256, "learning_rate": 1e-06, "loss": -0.0604, "step": 1048 }, { "clip_ratio/high_max": 0.0020995121230953373, "clip_ratio/high_mean": 0.0006992659818934044, "clip_ratio/low_mean": 0.0008514704632034409, "clip_ratio/low_min": 1.7846945411292836e-05, "clip_ratio/region_mean": 0.001550736455101287, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 1065.997802734375, "completions/mean_terminated_length": 606.4344482421875, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 2.4502770487022456, "grad_norm": 0.3484068810939789, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 154357067.0, "reward": 0.5647321939468384, "reward_std": 0.14943771064281464, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1049 }, { "clip_ratio/high_max": 0.0028557084588101134, "clip_ratio/high_mean": 0.0009366729300381849, "clip_ratio/low_mean": 0.0011126888975923066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002049361872195732, "epoch": 2.452610090405366, "grad_norm": 0.37618282437324524, "learning_rate": 1e-06, "loss": -0.0569, "step": 1050 }, { "clip_ratio/high_max": 0.0027996127391816117, "clip_ratio/high_mean": 0.0009656961301516276, "clip_ratio/low_mean": 0.001358412255285657, "clip_ratio/low_min": 1.5879064449109137e-05, "clip_ratio/region_mean": 0.002324108405446168, "epoch": 2.4549431321084865, "grad_norm": 0.2839452922344208, "learning_rate": 1e-06, "loss": -0.057, "step": 1051 }, { "clip_ratio/high_max": 0.0028057046947651543, "clip_ratio/high_mean": 0.0009376704547321424, "clip_ratio/low_mean": 0.001566952392749954, "clip_ratio/low_min": 1.984126902243588e-05, "clip_ratio/region_mean": 0.0025046228474820964, "epoch": 2.457276173811607, "grad_norm": 0.2504903972148895, "learning_rate": 1e-06, "loss": -0.0571, "step": 1052 }, { "clip_ratio/high_max": 0.0025813447864493355, "clip_ratio/high_mean": 0.0010606997839204269, "clip_ratio/low_mean": 0.0006247893743420718, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016854891619004775, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 1123.673095703125, "completions/mean_terminated_length": 632.7945556640625, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 2.4596092155147273, "grad_norm": 0.30488279461860657, "learning_rate": 1e-06, "loss": -0.0629, "num_tokens": 154937918.0, "reward": 0.543526828289032, "reward_std": 0.1814917027950287, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1053 }, { "clip_ratio/high_max": 0.0030096502196101937, "clip_ratio/high_mean": 0.0012082152516086353, "clip_ratio/low_mean": 0.0008828598911350127, "clip_ratio/low_min": 1.0511267646506894e-05, "clip_ratio/region_mean": 0.002091075155476574, "epoch": 2.4619422572178475, "grad_norm": 0.2740449011325836, "learning_rate": 1e-06, "loss": -0.0631, "step": 1054 }, { "clip_ratio/high_max": 0.0032181386195588857, "clip_ratio/high_mean": 0.0013170854817872168, "clip_ratio/low_mean": 0.0009968225913326023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023139080876717344, "epoch": 2.464275298920968, "grad_norm": 0.2314031422138214, "learning_rate": 1e-06, "loss": -0.0633, "step": 1055 }, { "clip_ratio/high_max": 0.002719180021813372, "clip_ratio/high_mean": 0.001063241807059967, "clip_ratio/low_mean": 0.0011379034131095978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022011452019796707, "epoch": 2.466608340624089, "grad_norm": 0.2776223123073578, "learning_rate": 1e-06, "loss": -0.0633, "step": 1056 }, { "clip_ratio/high_max": 0.00232488347319304, "clip_ratio/high_mean": 0.0007725332707195776, "clip_ratio/low_mean": 0.0004357351265298348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001208268353366293, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 1210.6551513671875, "completions/mean_terminated_length": 611.8099365234375, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 2.468941382327209, "grad_norm": 0.33703455328941345, "learning_rate": 1e-06, "loss": -0.0685, "num_tokens": 155480625.0, "reward": 0.5625, "reward_std": 0.13467255234718323, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1057 }, { "clip_ratio/high_max": 0.002764700235275086, "clip_ratio/high_mean": 0.0009506015940132784, "clip_ratio/low_mean": 0.0006605244466300064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016111260156321805, "epoch": 2.4712744240303297, "grad_norm": 0.3677258789539337, "learning_rate": 1e-06, "loss": -0.0688, "step": 1058 }, { "clip_ratio/high_max": 0.0029668719434994273, "clip_ratio/high_mean": 0.0009761436376720667, "clip_ratio/low_mean": 0.0008182404535546084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017943840939551592, "epoch": 2.47360746573345, "grad_norm": 0.29488086700439453, "learning_rate": 1e-06, "loss": -0.0689, "step": 1059 }, { "clip_ratio/high_max": 0.0024077127018244937, "clip_ratio/high_mean": 0.000827830983325839, "clip_ratio/low_mean": 0.001010290815429471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018381218214926776, "epoch": 2.4759405074365706, "grad_norm": 0.22862303256988525, "learning_rate": 1e-06, "loss": -0.0689, "step": 1060 }, { "clip_ratio/high_max": 0.0029381492131506093, "clip_ratio/high_mean": 0.0012637385625566822, "clip_ratio/low_mean": 0.0007906910850579152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020544295985018834, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3153.0, "completions/mean_length": 1159.727783203125, "completions/mean_terminated_length": 588.13330078125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.478273549139691, "grad_norm": 0.3715926706790924, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 156005069.0, "reward": 0.5993303656578064, "reward_std": 0.1802881807088852, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1061 }, { "clip_ratio/high_max": 0.0034059446115861647, "clip_ratio/high_mean": 0.0013395270325418096, "clip_ratio/low_mean": 0.0010574301250017015, "clip_ratio/low_min": 1.3001872503082268e-05, "clip_ratio/region_mean": 0.002396957170276437, "epoch": 2.4806065908428114, "grad_norm": 0.29294928908348083, "learning_rate": 1e-06, "loss": -0.075, "step": 1062 }, { "clip_ratio/high_max": 0.003073756306548603, "clip_ratio/high_mean": 0.001338158421276603, "clip_ratio/low_mean": 0.0012771279207299813, "clip_ratio/low_min": 2.6003745006164536e-05, "clip_ratio/region_mean": 0.0026152863647439517, "epoch": 2.4829396325459316, "grad_norm": 0.29861995577812195, "learning_rate": 1e-06, "loss": -0.0751, "step": 1063 }, { "clip_ratio/high_max": 0.0038295917911455035, "clip_ratio/high_mean": 0.0014337743923533708, "clip_ratio/low_mean": 0.0014355370567500358, "clip_ratio/low_min": 1.3001872503082268e-05, "clip_ratio/region_mean": 0.0028693113999906927, "epoch": 2.4852726742490523, "grad_norm": 0.27737295627593994, "learning_rate": 1e-06, "loss": -0.0753, "step": 1064 }, { "clip_ratio/high_max": 0.002714105990889948, "clip_ratio/high_mean": 0.0009218911745847436, "clip_ratio/low_mean": 0.0005902984266867861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015121895994525403, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 1139.15625, "completions/mean_terminated_length": 646.3489990234375, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 2.4876057159521725, "grad_norm": 0.2811812162399292, "learning_rate": 1e-06, "loss": -0.0534, "num_tokens": 156587673.0, "reward": 0.5948660969734192, "reward_std": 0.15857158601284027, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 1065 }, { "clip_ratio/high_max": 0.0030347594947670586, "clip_ratio/high_mean": 0.001111539651901694, "clip_ratio/low_mean": 0.0007866604082664708, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018982000910909846, "epoch": 2.489938757655293, "grad_norm": 0.260781854391098, "learning_rate": 1e-06, "loss": -0.0537, "step": 1066 }, { "clip_ratio/high_max": 0.0033222405400010757, "clip_ratio/high_mean": 0.0011177840005984763, "clip_ratio/low_mean": 0.0009584187682776246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020762028070748784, "epoch": 2.4922717993584134, "grad_norm": 0.2341073453426361, "learning_rate": 1e-06, "loss": -0.0539, "step": 1067 }, { "clip_ratio/high_max": 0.002542661844927352, "clip_ratio/high_mean": 0.0010024788025475573, "clip_ratio/low_mean": 0.0011134628221043386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002115941599186044, "epoch": 2.494604841061534, "grad_norm": 0.237702876329422, "learning_rate": 1e-06, "loss": -0.0539, "step": 1068 }, { "clip_ratio/high_max": 0.0029128591049811803, "clip_ratio/high_mean": 0.001122817240684526, "clip_ratio/low_mean": 0.0003881173327044962, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015109345877135638, "completions/clipped_ratio": 0.1964285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 1375.216552734375, "completions/mean_terminated_length": 710.1361083984375, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 2.4969378827646542, "grad_norm": 0.30831193923950195, "learning_rate": 1e-06, "loss": -0.0844, "num_tokens": 157197947.0, "reward": 0.5401785969734192, "reward_std": 0.16841016709804535, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1069 }, { "clip_ratio/high_max": 0.003005316306371242, "clip_ratio/high_mean": 0.0012329331184446346, "clip_ratio/low_mean": 0.0005590850987573504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017920181853696704, "epoch": 2.499270924467775, "grad_norm": 0.29480475187301636, "learning_rate": 1e-06, "loss": -0.0846, "step": 1070 }, { "clip_ratio/high_max": 0.003337475980515592, "clip_ratio/high_mean": 0.0013035204883635743, "clip_ratio/low_mean": 0.0007201540975074749, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020236746131558903, "epoch": 2.5016039661708955, "grad_norm": 0.21608605980873108, "learning_rate": 1e-06, "loss": -0.0848, "step": 1071 }, { "clip_ratio/high_max": 0.0032416925678262487, "clip_ratio/high_mean": 0.001307557919062674, "clip_ratio/low_mean": 0.0008353501461897395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021429080952657387, "epoch": 2.5039370078740157, "grad_norm": 0.2243119776248932, "learning_rate": 1e-06, "loss": -0.0849, "step": 1072 }, { "clip_ratio/high_max": 0.0021912348056503106, "clip_ratio/high_mean": 0.0009612147186999209, "clip_ratio/low_mean": 0.0006734274684276897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00163464221986942, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3212.0, "completions/mean_length": 1191.6373291015625, "completions/mean_terminated_length": 671.9092407226562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.506270049577136, "grad_norm": 0.32219722867012024, "learning_rate": 1e-06, "loss": -0.0749, "num_tokens": 157793438.0, "reward": 0.590401828289032, "reward_std": 0.17484305799007416, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1073 }, { "clip_ratio/high_max": 0.0026715986750787124, "clip_ratio/high_mean": 0.0010693446074583335, "clip_ratio/low_mean": 0.0008631762666482246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001932520914124325, "epoch": 2.5086030912802566, "grad_norm": 0.254000723361969, "learning_rate": 1e-06, "loss": -0.0752, "step": 1074 }, { "clip_ratio/high_max": 0.0026933696353808045, "clip_ratio/high_mean": 0.00115871095113107, "clip_ratio/low_mean": 0.001100592824514024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022593038374907337, "epoch": 2.5109361329833773, "grad_norm": 0.2328997105360031, "learning_rate": 1e-06, "loss": -0.0753, "step": 1075 }, { "clip_ratio/high_max": 0.0028204722548252903, "clip_ratio/high_mean": 0.0010656727863533888, "clip_ratio/low_mean": 0.001168592214526143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022342649899655953, "epoch": 2.5132691746864975, "grad_norm": 0.2636139392852783, "learning_rate": 1e-06, "loss": -0.0753, "step": 1076 }, { "clip_ratio/high_max": 0.0027340222732163966, "clip_ratio/high_mean": 0.0009299906196247321, "clip_ratio/low_mean": 0.0007416015505441464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001671592181082815, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3079.0, "completions/mean_length": 1231.919677734375, "completions/mean_terminated_length": 656.0321655273438, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.515602216389618, "grad_norm": 0.34658893942832947, "learning_rate": 1e-06, "loss": -0.0651, "num_tokens": 158376614.0, "reward": 0.5714285969734192, "reward_std": 0.15890717506408691, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1077 }, { "clip_ratio/high_max": 0.002843072659743484, "clip_ratio/high_mean": 0.0009990428261517081, "clip_ratio/low_mean": 0.0010295792199030984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002028622016950976, "epoch": 2.5179352580927383, "grad_norm": 0.2607556879520416, "learning_rate": 1e-06, "loss": -0.0652, "step": 1078 }, { "clip_ratio/high_max": 0.0030230559350457042, "clip_ratio/high_mean": 0.0010666556881915312, "clip_ratio/low_mean": 0.0012042350135743618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002270890661748126, "epoch": 2.520268299795859, "grad_norm": 0.23274657130241394, "learning_rate": 1e-06, "loss": -0.0654, "step": 1079 }, { "clip_ratio/high_max": 0.0026979696049238555, "clip_ratio/high_mean": 0.0009742539659782778, "clip_ratio/low_mean": 0.0013522894550987985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023265434792847373, "epoch": 2.522601341498979, "grad_norm": 0.2244875580072403, "learning_rate": 1e-06, "loss": -0.0655, "step": 1080 }, { "clip_ratio/high_max": 0.0018652241342351772, "clip_ratio/high_mean": 0.0006941004485270241, "clip_ratio/low_mean": 0.0004151139025907469, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011092143586211023, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3628.0, "completions/mean_length": 1252.193115234375, "completions/mean_terminated_length": 698.5986328125, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 2.5249343832021, "grad_norm": 0.3375288248062134, "learning_rate": 1e-06, "loss": -0.0258, "num_tokens": 159011563.0, "reward": 0.520089328289032, "reward_std": 0.1151694729924202, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1081 }, { "clip_ratio/high_max": 0.002928855452410062, "clip_ratio/high_mean": 0.0008989085581561085, "clip_ratio/low_mean": 0.0005837733792759536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014826819660811452, "epoch": 2.52726742490522, "grad_norm": 0.2433030605316162, "learning_rate": 1e-06, "loss": -0.0261, "step": 1082 }, { "clip_ratio/high_max": 0.0029415451353997923, "clip_ratio/high_mean": 0.0009354234507554793, "clip_ratio/low_mean": 0.0006812501569584128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001616673598618945, "epoch": 2.5296004666083407, "grad_norm": 0.20506641268730164, "learning_rate": 1e-06, "loss": -0.0263, "step": 1083 }, { "clip_ratio/high_max": 0.0024541875172872096, "clip_ratio/high_mean": 0.0008309054683195427, "clip_ratio/low_mean": 0.0008008740928744373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001631779559829738, "epoch": 2.531933508311461, "grad_norm": 0.21931932866573334, "learning_rate": 1e-06, "loss": -0.0262, "step": 1084 }, { "clip_ratio/high_max": 0.0022768886665289756, "clip_ratio/high_mean": 0.000846750735945534, "clip_ratio/low_mean": 0.0005720474728150293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014187982269504573, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 1088.0703125, "completions/mean_terminated_length": 622.926513671875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.5342665500145816, "grad_norm": 0.370697945356369, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 159590042.0, "reward": 0.5714285969734192, "reward_std": 0.1486460566520691, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1085 }, { "clip_ratio/high_max": 0.002662609243998304, "clip_ratio/high_mean": 0.001032141619361937, "clip_ratio/low_mean": 0.0008312277450386318, "clip_ratio/low_min": 4.931276271236129e-05, "clip_ratio/region_mean": 0.0018633693834999576, "epoch": 2.536599591717702, "grad_norm": 0.29990503191947937, "learning_rate": 1e-06, "loss": -0.064, "step": 1086 }, { "clip_ratio/high_max": 0.0032535009559069294, "clip_ratio/high_mean": 0.0010791864497150527, "clip_ratio/low_mean": 0.0009813344277063152, "clip_ratio/low_min": 4.732724482892081e-05, "clip_ratio/region_mean": 0.0020605208810593467, "epoch": 2.5389326334208224, "grad_norm": 0.23016409575939178, "learning_rate": 1e-06, "loss": -0.0642, "step": 1087 }, { "clip_ratio/high_max": 0.003006082901265472, "clip_ratio/high_mean": 0.0011064850477850996, "clip_ratio/low_mean": 0.001108902984924498, "clip_ratio/low_min": 1.1548410839168355e-05, "clip_ratio/region_mean": 0.0022153881000122055, "epoch": 2.5412656751239426, "grad_norm": 0.2070586383342743, "learning_rate": 1e-06, "loss": -0.0643, "step": 1088 }, { "clip_ratio/high_max": 0.0025503474244032986, "clip_ratio/high_mean": 0.0011560129387362394, "clip_ratio/low_mean": 0.0007998784167284612, "clip_ratio/low_min": 2.5105442546191625e-05, "clip_ratio/region_mean": 0.0019558913918444887, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2836.0, "completions/mean_length": 1123.438720703125, "completions/mean_terminated_length": 650.4437255859375, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 2.5435987168270633, "grad_norm": 0.3656218349933624, "learning_rate": 1e-06, "loss": -0.0502, "num_tokens": 160174971.0, "reward": 0.5535714626312256, "reward_std": 0.197794571518898, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 1089 }, { "clip_ratio/high_max": 0.002961598554975353, "clip_ratio/high_mean": 0.001348602381767705, "clip_ratio/low_mean": 0.0010555532207945362, "clip_ratio/low_min": 2.5105442546191625e-05, "clip_ratio/region_mean": 0.0024041556171141565, "epoch": 2.545931758530184, "grad_norm": 0.3478671908378601, "learning_rate": 1e-06, "loss": -0.0506, "step": 1090 }, { "clip_ratio/high_max": 0.0028374877947499044, "clip_ratio/high_mean": 0.0013154700263839914, "clip_ratio/low_mean": 0.0012582149520312669, "clip_ratio/low_min": 2.5105442546191625e-05, "clip_ratio/region_mean": 0.0025736849784152582, "epoch": 2.548264800233304, "grad_norm": 0.2995145618915558, "learning_rate": 1e-06, "loss": -0.0508, "step": 1091 }, { "clip_ratio/high_max": 0.0030797913859714754, "clip_ratio/high_mean": 0.0012829393544961931, "clip_ratio/low_mean": 0.0015215690691547934, "clip_ratio/low_min": 1.4302059753390495e-05, "clip_ratio/region_mean": 0.0028045084109180607, "epoch": 2.5505978419364244, "grad_norm": 0.33942118287086487, "learning_rate": 1e-06, "loss": -0.051, "step": 1092 }, { "clip_ratio/high_max": 0.0027154575218446553, "clip_ratio/high_mean": 0.0009403135609318269, "clip_ratio/low_mean": 0.0004383714103823877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013786849558528047, "completions/clipped_ratio": 0.1774553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3796.0, "completions/mean_length": 1330.404052734375, "completions/mean_terminated_length": 733.7557983398438, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.552930883639545, "grad_norm": 0.3024427592754364, "learning_rate": 1e-06, "loss": -0.0898, "num_tokens": 160800557.0, "reward": 0.53125, "reward_std": 0.1597309112548828, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1093 }, { "clip_ratio/high_max": 0.003726006718352437, "clip_ratio/high_mean": 0.0012743875086016487, "clip_ratio/low_mean": 0.0005434095000964589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018177969686803408, "epoch": 2.5552639253426657, "grad_norm": 0.25026634335517883, "learning_rate": 1e-06, "loss": -0.0899, "step": 1094 }, { "clip_ratio/high_max": 0.0036933976589352824, "clip_ratio/high_mean": 0.0012457342709240038, "clip_ratio/low_mean": 0.0006731112216584734, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019188454753020778, "epoch": 2.557596967045786, "grad_norm": 0.23310990631580353, "learning_rate": 1e-06, "loss": -0.0901, "step": 1095 }, { "clip_ratio/high_max": 0.003475955832982436, "clip_ratio/high_mean": 0.0011531089658092242, "clip_ratio/low_mean": 0.0007925101235741749, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019456190675555263, "epoch": 2.5599300087489065, "grad_norm": 0.24037988483905792, "learning_rate": 1e-06, "loss": -0.0902, "step": 1096 }, { "clip_ratio/high_max": 0.0025270912956329994, "clip_ratio/high_mean": 0.0011240604162594536, "clip_ratio/low_mean": 0.0009068060808203882, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020308665043557994, "completions/clipped_ratio": 0.1774553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 1283.2578125, "completions/mean_terminated_length": 676.4382934570312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.5622630504520267, "grad_norm": 0.35510119795799255, "learning_rate": 1e-06, "loss": -0.0802, "num_tokens": 161386172.0, "reward": 0.5178571939468384, "reward_std": 0.18468938767910004, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1097 }, { "clip_ratio/high_max": 0.003006246748554986, "clip_ratio/high_mean": 0.0013421027542790398, "clip_ratio/low_mean": 0.0010785836548166117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002420686447294429, "epoch": 2.5645960921551474, "grad_norm": 0.37266677618026733, "learning_rate": 1e-06, "loss": -0.0804, "step": 1098 }, { "clip_ratio/high_max": 0.002982464611704927, "clip_ratio/high_mean": 0.0012667034025071189, "clip_ratio/low_mean": 0.0013548635179176927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002621566949528642, "epoch": 2.5669291338582676, "grad_norm": 0.23731666803359985, "learning_rate": 1e-06, "loss": -0.0806, "step": 1099 }, { "clip_ratio/high_max": 0.00288599579653237, "clip_ratio/high_mean": 0.0012670163796428824, "clip_ratio/low_mean": 0.001627523721253965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002894540004490409, "epoch": 2.5692621755613883, "grad_norm": 0.2168024182319641, "learning_rate": 1e-06, "loss": -0.0808, "step": 1100 }, { "clip_ratio/high_max": 0.002326938461919781, "clip_ratio/high_mean": 0.0009346261258542654, "clip_ratio/low_mean": 0.0005173175809431996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014519437027047388, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2913.0, "completions/mean_length": 1109.4710693359375, "completions/mean_terminated_length": 674.0946044921875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.5715952172645085, "grad_norm": 0.3829411268234253, "learning_rate": 1e-06, "loss": -0.0594, "num_tokens": 162010586.0, "reward": 0.6383928656578064, "reward_std": 0.1537972390651703, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 1101 }, { "clip_ratio/high_max": 0.0024993760089273565, "clip_ratio/high_mean": 0.0010787901337607764, "clip_ratio/low_mean": 0.0007505401317757787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018293302418896928, "epoch": 2.573928258967629, "grad_norm": 0.2564522325992584, "learning_rate": 1e-06, "loss": -0.0596, "step": 1102 }, { "clip_ratio/high_max": 0.0030906805586710107, "clip_ratio/high_mean": 0.0011437192715675337, "clip_ratio/low_mean": 0.0009046731920534512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020483924672589637, "epoch": 2.5762613006707493, "grad_norm": 0.20611624419689178, "learning_rate": 1e-06, "loss": -0.0598, "step": 1103 }, { "clip_ratio/high_max": 0.0025507345417281613, "clip_ratio/high_mean": 0.0010343282538087806, "clip_ratio/low_mean": 0.0010219057076028548, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020562340068863705, "epoch": 2.57859434237387, "grad_norm": 0.30897054076194763, "learning_rate": 1e-06, "loss": -0.0599, "step": 1104 }, { "clip_ratio/high_max": 0.0027421125705586746, "clip_ratio/high_mean": 0.0010197161354881246, "clip_ratio/low_mean": 0.0004768837093251932, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001496599848906044, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 1054.1317138671875, "completions/mean_terminated_length": 597.2657470703125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.5809273840769906, "grad_norm": 0.32677143812179565, "learning_rate": 1e-06, "loss": -0.0551, "num_tokens": 162562792.0, "reward": 0.5959821939468384, "reward_std": 0.15357083082199097, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1105 }, { "clip_ratio/high_max": 0.0025771559012355283, "clip_ratio/high_mean": 0.0010178010234085377, "clip_ratio/low_mean": 0.000736704959308554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017545059599797241, "epoch": 2.583260425780111, "grad_norm": 0.27515554428100586, "learning_rate": 1e-06, "loss": -0.0553, "step": 1106 }, { "clip_ratio/high_max": 0.0029067202340229414, "clip_ratio/high_mean": 0.0011057470001105685, "clip_ratio/low_mean": 0.000855755053635221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001961502101039514, "epoch": 2.585593467483231, "grad_norm": 0.22235415875911713, "learning_rate": 1e-06, "loss": -0.0555, "step": 1107 }, { "clip_ratio/high_max": 0.0027928195049753413, "clip_ratio/high_mean": 0.0010616704385029152, "clip_ratio/low_mean": 0.0010224524312434369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002084122876112815, "epoch": 2.5879265091863517, "grad_norm": 0.27045929431915283, "learning_rate": 1e-06, "loss": -0.0555, "step": 1108 }, { "clip_ratio/high_max": 0.002150590888049919, "clip_ratio/high_mean": 0.0010427422366774408, "clip_ratio/low_mean": 0.000490098445879994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015328407025663182, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3760.0, "completions/mean_length": 1091.25, "completions/mean_terminated_length": 657.6142578125, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 2.5902595508894724, "grad_norm": 0.4157736897468567, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 163170296.0, "reward": 0.5647321939468384, "reward_std": 0.16578474640846252, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 1109 }, { "clip_ratio/high_max": 0.0026978680325555615, "clip_ratio/high_mean": 0.0011257234727963805, "clip_ratio/low_mean": 0.0007382088842859957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018639323971001431, "epoch": 2.5925925925925926, "grad_norm": 0.25867852568626404, "learning_rate": 1e-06, "loss": -0.0603, "step": 1110 }, { "clip_ratio/high_max": 0.0028489320684457198, "clip_ratio/high_mean": 0.0012609830337169115, "clip_ratio/low_mean": 0.0008413531631958904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002102336278767325, "epoch": 2.5949256342957128, "grad_norm": 0.25433149933815, "learning_rate": 1e-06, "loss": -0.0605, "step": 1111 }, { "clip_ratio/high_max": 0.0024162857080227695, "clip_ratio/high_mean": 0.0010507349834369961, "clip_ratio/low_mean": 0.0009205314836435718, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019712664870894514, "epoch": 2.5972586759988334, "grad_norm": 0.2389124184846878, "learning_rate": 1e-06, "loss": -0.0605, "step": 1112 }, { "clip_ratio/high_max": 0.00232755062461365, "clip_ratio/high_mean": 0.0009195824804919539, "clip_ratio/low_mean": 0.0005153276651981287, "clip_ratio/low_min": 1.2817883543903008e-05, "clip_ratio/region_mean": 0.001434910129319178, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3426.0, "completions/mean_length": 1138.8426513671875, "completions/mean_terminated_length": 618.8175659179688, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.599591717701954, "grad_norm": 0.33814549446105957, "learning_rate": 1e-06, "loss": -0.0936, "num_tokens": 163728859.0, "reward": 0.6272321939468384, "reward_std": 0.1795371025800705, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1113 }, { "clip_ratio/high_max": 0.0033839424868347123, "clip_ratio/high_mean": 0.0012554665518109687, "clip_ratio/low_mean": 0.0007289757140824804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00198444229317829, "epoch": 2.6019247594050743, "grad_norm": 0.25683408975601196, "learning_rate": 1e-06, "loss": -0.0939, "step": 1114 }, { "clip_ratio/high_max": 0.002822003727487754, "clip_ratio/high_mean": 0.001149767565948423, "clip_ratio/low_mean": 0.000779302723458386, "clip_ratio/low_min": 1.2332280675764196e-05, "clip_ratio/region_mean": 0.0019290702839498408, "epoch": 2.604257801108195, "grad_norm": 0.26113370060920715, "learning_rate": 1e-06, "loss": -0.094, "step": 1115 }, { "clip_ratio/high_max": 0.002664569779881276, "clip_ratio/high_mean": 0.0010907177729677642, "clip_ratio/low_mean": 0.0010436815664434107, "clip_ratio/low_min": 2.466456135152839e-05, "clip_ratio/region_mean": 0.002134399299393408, "epoch": 2.606590842811315, "grad_norm": 0.21102070808410645, "learning_rate": 1e-06, "loss": -0.0941, "step": 1116 }, { "clip_ratio/high_max": 0.0020429506421351107, "clip_ratio/high_mean": 0.0006444957043640898, "clip_ratio/low_mean": 0.000556617698066475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012011134040221805, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 1052.1273193359375, "completions/mean_terminated_length": 630.5488891601562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 2.608923884514436, "grad_norm": 0.3030131161212921, "learning_rate": 1e-06, "loss": -0.0359, "num_tokens": 164309485.0, "reward": 0.5524553656578064, "reward_std": 0.13876289129257202, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 1117 }, { "clip_ratio/high_max": 0.0025315484544989886, "clip_ratio/high_mean": 0.0008370420373466914, "clip_ratio/low_mean": 0.0007493260559385817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001586368085554568, "epoch": 2.611256926217556, "grad_norm": 0.25326409935951233, "learning_rate": 1e-06, "loss": -0.036, "step": 1118 }, { "clip_ratio/high_max": 0.0025373918942932505, "clip_ratio/high_mean": 0.00083686521384152, "clip_ratio/low_mean": 0.001015362488487881, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018522277350712102, "epoch": 2.6135899679206767, "grad_norm": 0.2460647076368332, "learning_rate": 1e-06, "loss": -0.0362, "step": 1119 }, { "clip_ratio/high_max": 0.002477307054505218, "clip_ratio/high_mean": 0.0008151182400979451, "clip_ratio/low_mean": 0.0010352167614655627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018503350001992658, "epoch": 2.615923009623797, "grad_norm": 0.24024319648742676, "learning_rate": 1e-06, "loss": -0.0363, "step": 1120 }, { "clip_ratio/high_max": 0.0021482690281118266, "clip_ratio/high_mean": 0.0008689304322615499, "clip_ratio/low_mean": 0.0005232768307905644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013922072866989765, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3888.0, "completions/mean_length": 996.0848388671875, "completions/mean_terminated_length": 602.2590942382812, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.6182560513269175, "grad_norm": 0.2719813585281372, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 164869201.0, "reward": 0.660714328289032, "reward_std": 0.15390713512897491, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1121 }, { "clip_ratio/high_max": 0.002664324529177975, "clip_ratio/high_mean": 0.0010516172642383026, "clip_ratio/low_mean": 0.0006718581489622011, "clip_ratio/low_min": 1.4859724615234882e-05, "clip_ratio/region_mean": 0.001723475430480903, "epoch": 2.6205890930300377, "grad_norm": 0.3058817982673645, "learning_rate": 1e-06, "loss": -0.0493, "step": 1122 }, { "clip_ratio/high_max": 0.0028086251113563776, "clip_ratio/high_mean": 0.001154651603428647, "clip_ratio/low_mean": 0.0008613234749645926, "clip_ratio/low_min": 2.2080905182519928e-05, "clip_ratio/region_mean": 0.002015975085669197, "epoch": 2.6229221347331584, "grad_norm": 0.19979745149612427, "learning_rate": 1e-06, "loss": -0.0496, "step": 1123 }, { "clip_ratio/high_max": 0.0024760094711382408, "clip_ratio/high_mean": 0.0009517693533780403, "clip_ratio/low_mean": 0.00111147116876964, "clip_ratio/low_min": 1.1040452591259964e-05, "clip_ratio/region_mean": 0.0020632405357901007, "epoch": 2.625255176436279, "grad_norm": 0.2403215914964676, "learning_rate": 1e-06, "loss": -0.0496, "step": 1124 }, { "clip_ratio/high_max": 0.0027720848738681525, "clip_ratio/high_mean": 0.0009584093750163447, "clip_ratio/low_mean": 0.000537255859853758, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014956652375985868, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3061.0, "completions/mean_length": 1032.552490234375, "completions/mean_terminated_length": 630.2815551757812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.6275882181393992, "grad_norm": 0.2966957688331604, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 165453048.0, "reward": 0.6517857313156128, "reward_std": 0.17476709187030792, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1125 }, { "clip_ratio/high_max": 0.00300396719831042, "clip_ratio/high_mean": 0.001154687659436604, "clip_ratio/low_mean": 0.0008199599869840313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001974647617316805, "epoch": 2.6299212598425195, "grad_norm": 0.25790533423423767, "learning_rate": 1e-06, "loss": -0.0612, "step": 1126 }, { "clip_ratio/high_max": 0.0032114461428136565, "clip_ratio/high_mean": 0.0012655668942898046, "clip_ratio/low_mean": 0.0009432689948880579, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022088358527980745, "epoch": 2.63225430154564, "grad_norm": 0.21397745609283447, "learning_rate": 1e-06, "loss": -0.0614, "step": 1127 }, { "clip_ratio/high_max": 0.0031452557304874063, "clip_ratio/high_mean": 0.001191447696328396, "clip_ratio/low_mean": 0.001080630059732357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022720778069924563, "epoch": 2.6345873432487608, "grad_norm": 0.25344526767730713, "learning_rate": 1e-06, "loss": -0.0614, "step": 1128 }, { "clip_ratio/high_max": 0.0022820700614829548, "clip_ratio/high_mean": 0.0009935147118085297, "clip_ratio/low_mean": 0.0007081885323714232, "clip_ratio/low_min": 4.7674659072072245e-05, "clip_ratio/region_mean": 0.0017017032078001648, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3312.0, "completions/mean_length": 1139.97216796875, "completions/mean_terminated_length": 615.5781860351562, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 2.636920384951881, "grad_norm": 0.32561057806015015, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 166002015.0, "reward": 0.5837053656578064, "reward_std": 0.1879894882440567, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1129 }, { "clip_ratio/high_max": 0.002762396055913996, "clip_ratio/high_mean": 0.001226898548338795, "clip_ratio/low_mean": 0.000930823360249633, "clip_ratio/low_min": 7.519316386606079e-05, "clip_ratio/region_mean": 0.0021577219049504492, "epoch": 2.6392534266550016, "grad_norm": 0.25686073303222656, "learning_rate": 1e-06, "loss": -0.0589, "step": 1130 }, { "clip_ratio/high_max": 0.0028286995220696554, "clip_ratio/high_mean": 0.0012117004789615748, "clip_ratio/low_mean": 0.0010972727650369052, "clip_ratio/low_min": 3.666993507067673e-05, "clip_ratio/region_mean": 0.002308973234903533, "epoch": 2.641586468358122, "grad_norm": 0.2529727518558502, "learning_rate": 1e-06, "loss": -0.0591, "step": 1131 }, { "clip_ratio/high_max": 0.0026503606131882407, "clip_ratio/high_mean": 0.0011134245469293091, "clip_ratio/low_mean": 0.0013780929548374843, "clip_ratio/low_min": 9.493100515101105e-05, "clip_ratio/region_mean": 0.0024915175017667934, "epoch": 2.6439195100612425, "grad_norm": 0.25941675901412964, "learning_rate": 1e-06, "loss": -0.0592, "step": 1132 }, { "clip_ratio/high_max": 0.0023725707796984352, "clip_ratio/high_mean": 0.0009655496851337375, "clip_ratio/low_mean": 0.0005347466503735632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015002963627921417, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 982.0357666015625, "completions/mean_terminated_length": 599.6190185546875, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 2.6462525517643627, "grad_norm": 0.2986929714679718, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 166575767.0, "reward": 0.6194196939468384, "reward_std": 0.189485564827919, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1133 }, { "clip_ratio/high_max": 0.0027116593992104754, "clip_ratio/high_mean": 0.0011672088367049582, "clip_ratio/low_mean": 0.0007560647736681858, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001923273601278197, "epoch": 2.6485855934674833, "grad_norm": 0.272161066532135, "learning_rate": 1e-06, "loss": -0.0675, "step": 1134 }, { "clip_ratio/high_max": 0.002445779689878691, "clip_ratio/high_mean": 0.0011025048988813069, "clip_ratio/low_mean": 0.0009129698701144662, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002015474768995773, "epoch": 2.6509186351706036, "grad_norm": 0.204036146402359, "learning_rate": 1e-06, "loss": -0.0676, "step": 1135 }, { "clip_ratio/high_max": 0.0024223813525168225, "clip_ratio/high_mean": 0.0011369093772373162, "clip_ratio/low_mean": 0.0010873437604459468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022242531340452842, "epoch": 2.653251676873724, "grad_norm": 0.2310946136713028, "learning_rate": 1e-06, "loss": -0.0677, "step": 1136 }, { "clip_ratio/high_max": 0.0019093765258730855, "clip_ratio/high_mean": 0.0007528779151471099, "clip_ratio/low_mean": 0.0005131302204972599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00126600810472155, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4006.0, "completions/mean_length": 1196.7523193359375, "completions/mean_terminated_length": 655.3033447265625, "completions/min_length": 224.0, "completions/min_terminated_length": 224.0, "epoch": 2.6555847185768444, "grad_norm": 0.2802563011646271, "learning_rate": 1e-06, "loss": -0.056, "num_tokens": 167160089.0, "reward": 0.5691964626312256, "reward_std": 0.15916569530963898, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1137 }, { "clip_ratio/high_max": 0.0025058464889298193, "clip_ratio/high_mean": 0.0009987526918848744, "clip_ratio/low_mean": 0.0006417501290343353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016405027927248739, "epoch": 2.657917760279965, "grad_norm": 0.23167243599891663, "learning_rate": 1e-06, "loss": -0.0562, "step": 1138 }, { "clip_ratio/high_max": 0.002632463161717169, "clip_ratio/high_mean": 0.000962874670221936, "clip_ratio/low_mean": 0.0008900784996512812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018529531444073655, "epoch": 2.6602508019830857, "grad_norm": 0.32529324293136597, "learning_rate": 1e-06, "loss": -0.0564, "step": 1139 }, { "clip_ratio/high_max": 0.0023656452540308237, "clip_ratio/high_mean": 0.0009692981020634761, "clip_ratio/low_mean": 0.0009637194434617413, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019330175855429843, "epoch": 2.662583843686206, "grad_norm": 0.27710026502609253, "learning_rate": 1e-06, "loss": -0.0564, "step": 1140 }, { "clip_ratio/high_max": 0.0022393335384549573, "clip_ratio/high_mean": 0.0007632964770891704, "clip_ratio/low_mean": 0.0005693107414117549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013326072366908193, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 1360.2723388671875, "completions/mean_terminated_length": 742.7688598632812, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 2.664916885389326, "grad_norm": 0.2509084641933441, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 167792013.0, "reward": 0.463169664144516, "reward_std": 0.16214200854301453, "rewards/verify_math_reward/mean": 0.4631696343421936, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 1141 }, { "clip_ratio/high_max": 0.0027459394914330915, "clip_ratio/high_mean": 0.0010035582890850492, "clip_ratio/low_mean": 0.0008170855089701945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018206437562184874, "epoch": 2.667249927092447, "grad_norm": 0.22688522934913635, "learning_rate": 1e-06, "loss": -0.0586, "step": 1142 }, { "clip_ratio/high_max": 0.0028888552769785747, "clip_ratio/high_mean": 0.0009991747792810202, "clip_ratio/low_mean": 0.0009259738062610268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019251485900895204, "epoch": 2.6695829687955674, "grad_norm": 0.22150474786758423, "learning_rate": 1e-06, "loss": -0.0587, "step": 1143 }, { "clip_ratio/high_max": 0.002439021052850876, "clip_ratio/high_mean": 0.0009087577436730498, "clip_ratio/low_mean": 0.0010164745017391397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019252322454121895, "epoch": 2.6719160104986877, "grad_norm": 0.20555339753627777, "learning_rate": 1e-06, "loss": -0.0588, "step": 1144 }, { "clip_ratio/high_max": 0.0018441054489812814, "clip_ratio/high_mean": 0.0007074962049955502, "clip_ratio/low_mean": 0.0005324415769791813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001239937777427258, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 1170.54248046875, "completions/mean_terminated_length": 682.9661865234375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 2.674249052201808, "grad_norm": 0.2705370783805847, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 168411147.0, "reward": 0.5089285969734192, "reward_std": 0.14042216539382935, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 1145 }, { "clip_ratio/high_max": 0.002201586488808971, "clip_ratio/high_mean": 0.0008728033153602155, "clip_ratio/low_mean": 0.0007337162696785526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016065196141425986, "epoch": 2.6765820939049285, "grad_norm": 0.24536322057247162, "learning_rate": 1e-06, "loss": -0.0645, "step": 1146 }, { "clip_ratio/high_max": 0.002139316660759505, "clip_ratio/high_mean": 0.0007986662731127581, "clip_ratio/low_mean": 0.0007943360560602741, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015930023946566507, "epoch": 2.678915135608049, "grad_norm": 0.24811631441116333, "learning_rate": 1e-06, "loss": -0.0646, "step": 1147 }, { "clip_ratio/high_max": 0.0023903483706817497, "clip_ratio/high_mean": 0.0008260114354925463, "clip_ratio/low_mean": 0.0009675408273324138, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017935522046172991, "epoch": 2.6812481773111694, "grad_norm": 0.2024756819009781, "learning_rate": 1e-06, "loss": -0.0647, "step": 1148 }, { "clip_ratio/high_max": 0.0022558805030712392, "clip_ratio/high_mean": 0.0008015481953407289, "clip_ratio/low_mean": 0.0005789182469015941, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001380466444970807, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 1187.8560791015625, "completions/mean_terminated_length": 698.7418212890625, "completions/min_length": 227.0, "completions/min_terminated_length": 227.0, "epoch": 2.68358121901429, "grad_norm": 0.28669261932373047, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 169031930.0, "reward": 0.5457589626312256, "reward_std": 0.15522870421409607, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 1149 }, { "clip_ratio/high_max": 0.0021396095071395393, "clip_ratio/high_mean": 0.0008520158498868113, "clip_ratio/low_mean": 0.0006523220808958285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015043379098642617, "epoch": 2.6859142607174102, "grad_norm": 0.2215685099363327, "learning_rate": 1e-06, "loss": -0.0455, "step": 1150 }, { "clip_ratio/high_max": 0.0026519415332586505, "clip_ratio/high_mean": 0.0009802410786505789, "clip_ratio/low_mean": 0.000823186988782254, "clip_ratio/low_min": 1.5277439160854556e-05, "clip_ratio/region_mean": 0.001803428036510013, "epoch": 2.688247302420531, "grad_norm": 0.24253995716571808, "learning_rate": 1e-06, "loss": -0.0457, "step": 1151 }, { "clip_ratio/high_max": 0.002195755841967184, "clip_ratio/high_mean": 0.0008404661348322406, "clip_ratio/low_mean": 0.0009513325512671145, "clip_ratio/low_min": 4.391512266010977e-05, "clip_ratio/region_mean": 0.001791798677004408, "epoch": 2.690580344123651, "grad_norm": 0.20786729454994202, "learning_rate": 1e-06, "loss": -0.0458, "step": 1152 }, { "clip_ratio/high_max": 0.002137276365829166, "clip_ratio/high_mean": 0.0008418489796895301, "clip_ratio/low_mean": 0.00039431780010090733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012361667722871061, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 1187.6429443359375, "completions/mean_terminated_length": 689.6104736328125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.6929133858267718, "grad_norm": 0.26343563199043274, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 169643770.0, "reward": 0.5703125, "reward_std": 0.1406080424785614, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1153 }, { "clip_ratio/high_max": 0.0022794629840063863, "clip_ratio/high_mean": 0.0009593052564014215, "clip_ratio/low_mean": 0.0005149428202457784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014742480670975056, "epoch": 2.695246427529892, "grad_norm": 0.21706290543079376, "learning_rate": 1e-06, "loss": -0.0644, "step": 1154 }, { "clip_ratio/high_max": 0.0026719845554907806, "clip_ratio/high_mean": 0.00097825049306266, "clip_ratio/low_mean": 0.0005811857872686232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015594363103446085, "epoch": 2.6975794692330126, "grad_norm": 0.45889315009117126, "learning_rate": 1e-06, "loss": -0.0645, "step": 1155 }, { "clip_ratio/high_max": 0.0024308937645400874, "clip_ratio/high_mean": 0.0009150312143901829, "clip_ratio/low_mean": 0.0007390931596091832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016541243676329032, "epoch": 2.699912510936133, "grad_norm": 0.20172478258609772, "learning_rate": 1e-06, "loss": -0.0646, "step": 1156 }, { "clip_ratio/high_max": 0.001911978415591875, "clip_ratio/high_mean": 0.000815377288745367, "clip_ratio/low_mean": 0.0006175725966386381, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014329498771985527, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3168.0, "completions/mean_length": 1103.0257568359375, "completions/mean_terminated_length": 631.264892578125, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 2.7022455526392535, "grad_norm": 0.3399980962276459, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 170219817.0, "reward": 0.6183035969734192, "reward_std": 0.15819111466407776, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1157 }, { "clip_ratio/high_max": 0.002753663487965241, "clip_ratio/high_mean": 0.0010733545605035033, "clip_ratio/low_mean": 0.000898408230568748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019717628456419334, "epoch": 2.704578594342374, "grad_norm": 0.285305917263031, "learning_rate": 1e-06, "loss": -0.0643, "step": 1158 }, { "clip_ratio/high_max": 0.002390883913903963, "clip_ratio/high_mean": 0.0009104770833801012, "clip_ratio/low_mean": 0.0010506581602385268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001961135247256607, "epoch": 2.7069116360454943, "grad_norm": 0.2516597807407379, "learning_rate": 1e-06, "loss": -0.0644, "step": 1159 }, { "clip_ratio/high_max": 0.0023764417856000364, "clip_ratio/high_mean": 0.0008862508093443466, "clip_ratio/low_mean": 0.0012949681567988591, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002181219002522994, "epoch": 2.7092446777486145, "grad_norm": 0.2154473513364792, "learning_rate": 1e-06, "loss": -0.0646, "step": 1160 }, { "clip_ratio/high_max": 0.0021184427678235807, "clip_ratio/high_mean": 0.0007785532925481675, "clip_ratio/low_mean": 0.0004812925199075835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012598458342836238, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 987.177490234375, "completions/mean_terminated_length": 652.8541259765625, "completions/min_length": 204.0, "completions/min_terminated_length": 204.0, "epoch": 2.711577719451735, "grad_norm": 0.2807345390319824, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 170838272.0, "reward": 0.5803571939468384, "reward_std": 0.14004239439964294, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 1161 }, { "clip_ratio/high_max": 0.0022629649320151657, "clip_ratio/high_mean": 0.0008516832112945849, "clip_ratio/low_mean": 0.0006314157672022702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014830990257905796, "epoch": 2.713910761154856, "grad_norm": 0.2561696469783783, "learning_rate": 1e-06, "loss": -0.0353, "step": 1162 }, { "clip_ratio/high_max": 0.002565336508268956, "clip_ratio/high_mean": 0.0009492197441431927, "clip_ratio/low_mean": 0.0007205124074971536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016697321334504522, "epoch": 2.716243802857976, "grad_norm": 0.202647864818573, "learning_rate": 1e-06, "loss": -0.0354, "step": 1163 }, { "clip_ratio/high_max": 0.002372305709286593, "clip_ratio/high_mean": 0.0009050377866515191, "clip_ratio/low_mean": 0.0009203308145515621, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018253686212119646, "epoch": 2.7185768445610963, "grad_norm": 0.1613180786371231, "learning_rate": 1e-06, "loss": -0.0355, "step": 1164 }, { "clip_ratio/high_max": 0.002923695297795348, "clip_ratio/high_mean": 0.0010887382250075461, "clip_ratio/low_mean": 0.0005822753610118525, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001671013578743441, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3938.0, "completions/mean_length": 1305.7154541015625, "completions/mean_terminated_length": 661.8035888671875, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 2.720909886264217, "grad_norm": 0.34382542967796326, "learning_rate": 1e-06, "loss": -0.0553, "num_tokens": 171422433.0, "reward": 0.5245535969734192, "reward_std": 0.1658179610967636, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 1165 }, { "clip_ratio/high_max": 0.00350352305395063, "clip_ratio/high_mean": 0.0011558006335690152, "clip_ratio/low_mean": 0.00077688737110293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019326879591972101, "epoch": 2.7232429279673376, "grad_norm": 0.29401642084121704, "learning_rate": 1e-06, "loss": -0.0555, "step": 1166 }, { "clip_ratio/high_max": 0.0030248135080910288, "clip_ratio/high_mean": 0.0012315093026700197, "clip_ratio/low_mean": 0.0010095744673890295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002241083755507134, "epoch": 2.725575969670458, "grad_norm": 0.2268916368484497, "learning_rate": 1e-06, "loss": -0.0557, "step": 1167 }, { "clip_ratio/high_max": 0.003271043417043984, "clip_ratio/high_mean": 0.0011021570935554337, "clip_ratio/low_mean": 0.0012381917695165612, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023403488739859313, "epoch": 2.7279090113735784, "grad_norm": 0.2651258111000061, "learning_rate": 1e-06, "loss": -0.0558, "step": 1168 }, { "clip_ratio/high_max": 0.0018323434414924122, "clip_ratio/high_mean": 0.0007325293281610357, "clip_ratio/low_mean": 0.0006358508017001441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001368380144413095, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 1216.068115234375, "completions/mean_terminated_length": 714.0616455078125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 2.7302420530766986, "grad_norm": 0.2982531189918518, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 172058222.0, "reward": 0.5111607313156128, "reward_std": 0.18160229921340942, "rewards/verify_math_reward/mean": 0.5111607313156128, "rewards/verify_math_reward/std": 0.5001546144485474, "step": 1169 }, { "clip_ratio/high_max": 0.0024237072066171095, "clip_ratio/high_mean": 0.0009196715491270879, "clip_ratio/low_mean": 0.0009615821636543842, "clip_ratio/low_min": 6.521401428472018e-05, "clip_ratio/region_mean": 0.001881253694591578, "epoch": 2.7325750947798193, "grad_norm": 0.2623019516468048, "learning_rate": 1e-06, "loss": -0.0595, "step": 1170 }, { "clip_ratio/high_max": 0.0023440176228177734, "clip_ratio/high_mean": 0.0008444089853583137, "clip_ratio/low_mean": 0.0010867597684409702, "clip_ratio/low_min": 5.1829283620463684e-05, "clip_ratio/region_mean": 0.001931168750161305, "epoch": 2.7349081364829395, "grad_norm": 0.26250067353248596, "learning_rate": 1e-06, "loss": -0.0595, "step": 1171 }, { "clip_ratio/high_max": 0.002036090547335334, "clip_ratio/high_mean": 0.0008630632728454657, "clip_ratio/low_mean": 0.0012405992401909316, "clip_ratio/low_min": 8.95601797310519e-05, "clip_ratio/region_mean": 0.002103662511217408, "epoch": 2.73724117818606, "grad_norm": 0.21721115708351135, "learning_rate": 1e-06, "loss": -0.0596, "step": 1172 }, { "clip_ratio/high_max": 0.0025588344724383205, "clip_ratio/high_mean": 0.0009905137867463054, "clip_ratio/low_mean": 0.00043740399178204825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014279178067226894, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 1183.5904541015625, "completions/mean_terminated_length": 625.8948974609375, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 2.7395742198891804, "grad_norm": 0.3492778539657593, "learning_rate": 1e-06, "loss": -0.0756, "num_tokens": 172629727.0, "reward": 0.5535714626312256, "reward_std": 0.14661546051502228, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 1173 }, { "clip_ratio/high_max": 0.003237783705117181, "clip_ratio/high_mean": 0.001139278729169746, "clip_ratio/low_mean": 0.0006850259969723993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018243047306896187, "epoch": 2.741907261592301, "grad_norm": 0.33572831749916077, "learning_rate": 1e-06, "loss": -0.0758, "step": 1174 }, { "clip_ratio/high_max": 0.002890553376346361, "clip_ratio/high_mean": 0.0011176306452398421, "clip_ratio/low_mean": 0.0008286535630759317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019462841810309328, "epoch": 2.7442403032954212, "grad_norm": 0.21871332824230194, "learning_rate": 1e-06, "loss": -0.076, "step": 1175 }, { "clip_ratio/high_max": 0.0031910955658531748, "clip_ratio/high_mean": 0.001188287607874372, "clip_ratio/low_mean": 0.0009394897433594451, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021277773412293755, "epoch": 2.746573344998542, "grad_norm": 0.2713559865951538, "learning_rate": 1e-06, "loss": -0.0762, "step": 1176 }, { "clip_ratio/high_max": 0.002106075917254202, "clip_ratio/high_mean": 0.000794560561189428, "clip_ratio/low_mean": 0.0007055580717860721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001500118654803373, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 1118.3046875, "completions/mean_terminated_length": 671.0770263671875, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 2.7489063867016625, "grad_norm": 0.356035053730011, "learning_rate": 1e-06, "loss": -0.0457, "num_tokens": 173238416.0, "reward": 0.6071428656578064, "reward_std": 0.16281278431415558, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1177 }, { "clip_ratio/high_max": 0.0025467468294664286, "clip_ratio/high_mean": 0.0009412623439857271, "clip_ratio/low_mean": 0.0008732760998100275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018145384601666592, "epoch": 2.7512394284047827, "grad_norm": 0.24822822213172913, "learning_rate": 1e-06, "loss": -0.046, "step": 1178 }, { "clip_ratio/high_max": 0.002678291952179279, "clip_ratio/high_mean": 0.0009726418738864595, "clip_ratio/low_mean": 0.0009707488916319562, "clip_ratio/low_min": 2.565154863987118e-05, "clip_ratio/region_mean": 0.001943390758242458, "epoch": 2.753572470107903, "grad_norm": 0.2327621877193451, "learning_rate": 1e-06, "loss": -0.0461, "step": 1179 }, { "clip_ratio/high_max": 0.0026214629324385896, "clip_ratio/high_mean": 0.0008822716972645139, "clip_ratio/low_mean": 0.0013206969961174764, "clip_ratio/low_min": 2.565154863987118e-05, "clip_ratio/region_mean": 0.002202968746132683, "epoch": 2.7559055118110236, "grad_norm": 0.23097383975982666, "learning_rate": 1e-06, "loss": -0.0462, "step": 1180 }, { "clip_ratio/high_max": 0.0026383646836620755, "clip_ratio/high_mean": 0.000980668108240934, "clip_ratio/low_mean": 0.0007353683377004927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017160364659503102, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 1247.09716796875, "completions/mean_terminated_length": 665.0631713867188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.7582385535141443, "grad_norm": 0.32525336742401123, "learning_rate": 1e-06, "loss": -0.0709, "num_tokens": 173821887.0, "reward": 0.5345982313156128, "reward_std": 0.16930988430976868, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1181 }, { "clip_ratio/high_max": 0.002790924299915787, "clip_ratio/high_mean": 0.0010920978220383404, "clip_ratio/low_mean": 0.0009414568048669025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020335546214482747, "epoch": 2.7605715952172645, "grad_norm": 0.25325608253479004, "learning_rate": 1e-06, "loss": -0.0711, "step": 1182 }, { "clip_ratio/high_max": 0.002858604333596304, "clip_ratio/high_mean": 0.0010866996581171406, "clip_ratio/low_mean": 0.0011052198096876964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021919194914516993, "epoch": 2.7629046369203847, "grad_norm": 0.22945712506771088, "learning_rate": 1e-06, "loss": -0.0713, "step": 1183 }, { "clip_ratio/high_max": 0.002792059247440193, "clip_ratio/high_mean": 0.0011309509209240787, "clip_ratio/low_mean": 0.0013075094866508152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002438460389385, "epoch": 2.7652376786235053, "grad_norm": 0.23172591626644135, "learning_rate": 1e-06, "loss": -0.0714, "step": 1184 }, { "clip_ratio/high_max": 0.003013772060512565, "clip_ratio/high_mean": 0.0011258381564402953, "clip_ratio/low_mean": 0.0008641459953651065, "clip_ratio/low_min": 1.304801662627142e-05, "clip_ratio/region_mean": 0.0019899841499864124, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3706.0, "completions/mean_length": 1096.640625, "completions/mean_terminated_length": 676.8829345703125, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.767570720326626, "grad_norm": 0.4517523944377899, "learning_rate": 1e-06, "loss": -0.0366, "num_tokens": 174442205.0, "reward": 0.5424107313156128, "reward_std": 0.18754372000694275, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763562679291, "step": 1185 }, { "clip_ratio/high_max": 0.003333640306664165, "clip_ratio/high_mean": 0.001336344128503697, "clip_ratio/low_mean": 0.0012563255713757826, "clip_ratio/low_min": 6.52400849503465e-05, "clip_ratio/region_mean": 0.002592669588921126, "epoch": 2.769903762029746, "grad_norm": 0.3139788508415222, "learning_rate": 1e-06, "loss": -0.037, "step": 1186 }, { "clip_ratio/high_max": 0.0034307411697227508, "clip_ratio/high_mean": 0.0012566710538521875, "clip_ratio/low_mean": 0.0014524489160976373, "clip_ratio/low_min": 5.219206650508568e-05, "clip_ratio/region_mean": 0.002709119929932058, "epoch": 2.772236803732867, "grad_norm": 0.26903483271598816, "learning_rate": 1e-06, "loss": -0.0371, "step": 1187 }, { "clip_ratio/high_max": 0.0035063183604506776, "clip_ratio/high_mean": 0.00135813991073519, "clip_ratio/low_mean": 0.0017541509369038977, "clip_ratio/low_min": 7.303107850020751e-05, "clip_ratio/region_mean": 0.0031122907894314267, "epoch": 2.774569845435987, "grad_norm": 0.2814794182777405, "learning_rate": 1e-06, "loss": -0.0373, "step": 1188 }, { "clip_ratio/high_max": 0.0020838035343331285, "clip_ratio/high_mean": 0.0008119172434817301, "clip_ratio/low_mean": 0.0005080953824290191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001320012645010138, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 1269.9576416015625, "completions/mean_terminated_length": 678.8150634765625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.7769028871391077, "grad_norm": 0.2438543736934662, "learning_rate": 1e-06, "loss": -0.0851, "num_tokens": 175037479.0, "reward": 0.5178571939468384, "reward_std": 0.1632988154888153, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 1189 }, { "clip_ratio/high_max": 0.002365332911722362, "clip_ratio/high_mean": 0.0009482004315941595, "clip_ratio/low_mean": 0.000811347272247076, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017595476601854898, "epoch": 2.779235928842228, "grad_norm": 0.21394813060760498, "learning_rate": 1e-06, "loss": -0.0853, "step": 1190 }, { "clip_ratio/high_max": 0.0028085903213650454, "clip_ratio/high_mean": 0.001010134254102013, "clip_ratio/low_mean": 0.0008846170476317639, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018947512508020736, "epoch": 2.7815689705453486, "grad_norm": 0.20263056457042694, "learning_rate": 1e-06, "loss": -0.0854, "step": 1191 }, { "clip_ratio/high_max": 0.00252992239620653, "clip_ratio/high_mean": 0.0009914755883073667, "clip_ratio/low_mean": 0.0010134298354387283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020049054437549785, "epoch": 2.783902012248469, "grad_norm": 0.20704755187034607, "learning_rate": 1e-06, "loss": -0.0854, "step": 1192 }, { "clip_ratio/high_max": 0.002115512397722341, "clip_ratio/high_mean": 0.0009101857149289572, "clip_ratio/low_mean": 0.0007816509087206214, "clip_ratio/low_min": 1.2021542715956457e-05, "clip_ratio/region_mean": 0.0016918366600293666, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3332.0, "completions/mean_length": 1241.036865234375, "completions/mean_terminated_length": 685.2706298828125, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 2.7862350539515894, "grad_norm": 0.339778333902359, "learning_rate": 1e-06, "loss": -0.0496, "num_tokens": 175639264.0, "reward": 0.5212053656578064, "reward_std": 0.19377093017101288, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 1193 }, { "clip_ratio/high_max": 0.002788091187539976, "clip_ratio/high_mean": 0.0011203964841115521, "clip_ratio/low_mean": 0.0010449898163642501, "clip_ratio/low_min": 1.3845812645740807e-05, "clip_ratio/region_mean": 0.0021653863150277175, "epoch": 2.7885680956547096, "grad_norm": 0.28704142570495605, "learning_rate": 1e-06, "loss": -0.0499, "step": 1194 }, { "clip_ratio/high_max": 0.003262251972046215, "clip_ratio/high_mean": 0.0012426850935298717, "clip_ratio/low_mean": 0.0013735083193751052, "clip_ratio/low_min": 2.6974536012858152e-05, "clip_ratio/region_mean": 0.002616193421999924, "epoch": 2.7909011373578303, "grad_norm": 0.24440589547157288, "learning_rate": 1e-06, "loss": -0.0501, "step": 1195 }, { "clip_ratio/high_max": 0.002734982001129538, "clip_ratio/high_mean": 0.0011236375848966418, "clip_ratio/low_mean": 0.0015607085042574909, "clip_ratio/low_min": 4.940060534863733e-05, "clip_ratio/region_mean": 0.002684346094611101, "epoch": 2.793234179060951, "grad_norm": 0.2630174458026886, "learning_rate": 1e-06, "loss": -0.0502, "step": 1196 }, { "clip_ratio/high_max": 0.003300103453511838, "clip_ratio/high_mean": 0.001191360746815917, "clip_ratio/low_mean": 0.0007367967900790973, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019281575951026753, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 1237.2467041015625, "completions/mean_terminated_length": 703.3602905273438, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 2.795567220764071, "grad_norm": 0.30969828367233276, "learning_rate": 1e-06, "loss": -0.0785, "num_tokens": 176254261.0, "reward": 0.5792410969734192, "reward_std": 0.16995178163051605, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1197 }, { "clip_ratio/high_max": 0.003714725600730162, "clip_ratio/high_mean": 0.0013485967392625753, "clip_ratio/low_mean": 0.0009606522280591889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002309248950041365, "epoch": 2.7979002624671914, "grad_norm": 0.29674312472343445, "learning_rate": 1e-06, "loss": -0.0786, "step": 1198 }, { "clip_ratio/high_max": 0.0038086366112111136, "clip_ratio/high_mean": 0.0014416351423278684, "clip_ratio/low_mean": 0.0009979936794479727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00243962890090188, "epoch": 2.800233304170312, "grad_norm": 0.3184277415275574, "learning_rate": 1e-06, "loss": -0.0788, "step": 1199 }, { "clip_ratio/high_max": 0.003577950566977961, "clip_ratio/high_mean": 0.0012790588716597995, "clip_ratio/low_mean": 0.001276157284337387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025552161896484904, "epoch": 2.8025663458734327, "grad_norm": 0.22036480903625488, "learning_rate": 1e-06, "loss": -0.079, "step": 1200 }, { "clip_ratio/high_max": 0.0021206787823757622, "clip_ratio/high_mean": 0.0008117430406855419, "clip_ratio/low_mean": 0.0005327217286321684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013444648102449719, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 1157.3828125, "completions/mean_terminated_length": 626.9605102539062, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 2.804899387576553, "grad_norm": 0.26239198446273804, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 176810772.0, "reward": 0.5580357313156128, "reward_std": 0.14676883816719055, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 1201 }, { "clip_ratio/high_max": 0.0025539233974996023, "clip_ratio/high_mean": 0.0009800572079257108, "clip_ratio/low_mean": 0.0007396926212095423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017197498382302001, "epoch": 2.8072324292796735, "grad_norm": 0.19713591039180756, "learning_rate": 1e-06, "loss": -0.0507, "step": 1202 }, { "clip_ratio/high_max": 0.002599708161142189, "clip_ratio/high_mean": 0.0009262524527002824, "clip_ratio/low_mean": 0.0008012518883333541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017275043101108167, "epoch": 2.8095654709827937, "grad_norm": 0.24003306031227112, "learning_rate": 1e-06, "loss": -0.0507, "step": 1203 }, { "clip_ratio/high_max": 0.002897015765483957, "clip_ratio/high_mean": 0.0010553792744758539, "clip_ratio/low_mean": 0.000998868641545414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002054247903288342, "epoch": 2.8118985126859144, "grad_norm": 0.20766811072826385, "learning_rate": 1e-06, "loss": -0.0508, "step": 1204 }, { "clip_ratio/high_max": 0.0023108789464458823, "clip_ratio/high_mean": 0.0009054496049429872, "clip_ratio/low_mean": 0.0006721101754010306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015775597821630072, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 1284.1239013671875, "completions/mean_terminated_length": 644.7109985351562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.8142315543890346, "grad_norm": 0.33877506852149963, "learning_rate": 1e-06, "loss": -0.0541, "num_tokens": 177365099.0, "reward": 0.5245535969734192, "reward_std": 0.17570818960666656, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 1205 }, { "clip_ratio/high_max": 0.002984140723128803, "clip_ratio/high_mean": 0.0012275164845050313, "clip_ratio/low_mean": 0.0009287758030041005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021562922847806476, "epoch": 2.8165645960921553, "grad_norm": 0.3308020532131195, "learning_rate": 1e-06, "loss": -0.0544, "step": 1206 }, { "clip_ratio/high_max": 0.003067373127123574, "clip_ratio/high_mean": 0.0012576047138281865, "clip_ratio/low_mean": 0.0011606596617639298, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024182643537642434, "epoch": 2.8188976377952755, "grad_norm": 0.2738145589828491, "learning_rate": 1e-06, "loss": -0.0547, "step": 1207 }, { "clip_ratio/high_max": 0.0028996927321713883, "clip_ratio/high_mean": 0.0010932534914900316, "clip_ratio/low_mean": 0.0014001720101077808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024934255197877064, "epoch": 2.821230679498396, "grad_norm": 0.35904988646507263, "learning_rate": 1e-06, "loss": -0.0547, "step": 1208 }, { "clip_ratio/high_max": 0.00207980446430156, "clip_ratio/high_mean": 0.0007250223825394642, "clip_ratio/low_mean": 0.0006736360783179407, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013986584708618466, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 1330.8460693359375, "completions/mean_terminated_length": 752.43994140625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.8235637212015163, "grad_norm": 0.2817864418029785, "learning_rate": 1e-06, "loss": -0.0639, "num_tokens": 178012521.0, "reward": 0.53125, "reward_std": 0.16198793053627014, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1209 }, { "clip_ratio/high_max": 0.0026954747299896553, "clip_ratio/high_mean": 0.0008574896291975165, "clip_ratio/low_mean": 0.0009417580295121297, "clip_ratio/low_min": 1.9860184693243355e-05, "clip_ratio/region_mean": 0.001799247671442572, "epoch": 2.825896762904637, "grad_norm": 0.2551283538341522, "learning_rate": 1e-06, "loss": -0.0641, "step": 1210 }, { "clip_ratio/high_max": 0.0026105109573109075, "clip_ratio/high_mean": 0.0008668896643939661, "clip_ratio/low_mean": 0.0011566333250812022, "clip_ratio/low_min": 2.131287328666076e-05, "clip_ratio/region_mean": 0.002023522974923253, "epoch": 2.8282298046077576, "grad_norm": 0.22527474164962769, "learning_rate": 1e-06, "loss": -0.0642, "step": 1211 }, { "clip_ratio/high_max": 0.002564192283898592, "clip_ratio/high_mean": 0.0008459131749987137, "clip_ratio/low_mean": 0.0012112642543797847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002057177436654456, "epoch": 2.830562846310878, "grad_norm": 0.22328247129917145, "learning_rate": 1e-06, "loss": -0.0643, "step": 1212 }, { "clip_ratio/high_max": 0.002531326266762335, "clip_ratio/high_mean": 0.0010001528316934127, "clip_ratio/low_mean": 0.0007164640178416448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017166168399853632, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2916.0, "completions/mean_length": 1123.37060546875, "completions/mean_terminated_length": 609.77490234375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.832895888013998, "grad_norm": 0.3418889343738556, "learning_rate": 1e-06, "loss": -0.0552, "num_tokens": 178569413.0, "reward": 0.6004464626312256, "reward_std": 0.17634011805057526, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1213 }, { "clip_ratio/high_max": 0.002907997964939568, "clip_ratio/high_mean": 0.0012003184456261806, "clip_ratio/low_mean": 0.000967797907833301, "clip_ratio/low_min": 1.5166221601248253e-05, "clip_ratio/region_mean": 0.0021681163270841353, "epoch": 2.8352289297171187, "grad_norm": 0.2897513210773468, "learning_rate": 1e-06, "loss": -0.0554, "step": 1214 }, { "clip_ratio/high_max": 0.002957124510430731, "clip_ratio/high_mean": 0.0010831455474544782, "clip_ratio/low_mean": 0.0012507559713412775, "clip_ratio/low_min": 1.1959433322772384e-05, "clip_ratio/region_mean": 0.002333901538804639, "epoch": 2.8375619714202394, "grad_norm": 0.2763117849826813, "learning_rate": 1e-06, "loss": -0.0556, "step": 1215 }, { "clip_ratio/high_max": 0.0029035662228125148, "clip_ratio/high_mean": 0.001143075820436934, "clip_ratio/low_mean": 0.001538982873171335, "clip_ratio/low_min": 3.0332443202496506e-05, "clip_ratio/region_mean": 0.0026820587008842267, "epoch": 2.8398950131233596, "grad_norm": 0.25847601890563965, "learning_rate": 1e-06, "loss": -0.0557, "step": 1216 }, { "clip_ratio/high_max": 0.002246396463306155, "clip_ratio/high_mean": 0.0008512069762218744, "clip_ratio/low_mean": 0.0006472689628935768, "clip_ratio/low_min": 1.4785900020797271e-05, "clip_ratio/region_mean": 0.0014984759218350518, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 1240.4609375, "completions/mean_terminated_length": 657.0712280273438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 2.8422280548264798, "grad_norm": 0.3724900484085083, "learning_rate": 1e-06, "loss": -0.0451, "num_tokens": 179157538.0, "reward": 0.559151828289032, "reward_std": 0.15642336010932922, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1217 }, { "clip_ratio/high_max": 0.002594152738311095, "clip_ratio/high_mean": 0.0010427189536130754, "clip_ratio/low_mean": 0.0009653071083448594, "clip_ratio/low_min": 1.4785900020797271e-05, "clip_ratio/region_mean": 0.0020080260692338925, "epoch": 2.8445610965296004, "grad_norm": 0.27912214398384094, "learning_rate": 1e-06, "loss": -0.0455, "step": 1218 }, { "clip_ratio/high_max": 0.002910590839746874, "clip_ratio/high_mean": 0.001090924324671505, "clip_ratio/low_mean": 0.0010437631863169372, "clip_ratio/low_min": 2.9571800041594543e-05, "clip_ratio/region_mean": 0.0021346874891605694, "epoch": 2.846894138232721, "grad_norm": 0.25943613052368164, "learning_rate": 1e-06, "loss": -0.0456, "step": 1219 }, { "clip_ratio/high_max": 0.00258619509259006, "clip_ratio/high_mean": 0.0009054034726432292, "clip_ratio/low_mean": 0.0012875327192887198, "clip_ratio/low_min": 4.4357700971886516e-05, "clip_ratio/region_mean": 0.0021929361901129596, "epoch": 2.8492271799358413, "grad_norm": 0.23242810368537903, "learning_rate": 1e-06, "loss": -0.0457, "step": 1220 }, { "clip_ratio/high_max": 0.002463534357957542, "clip_ratio/high_mean": 0.0009862915576377418, "clip_ratio/low_mean": 0.000740970635888516, "clip_ratio/low_min": 3.55821248376742e-05, "clip_ratio/region_mean": 0.0017272621698793955, "completions/clipped_ratio": 0.171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3266.0, "completions/mean_length": 1230.34716796875, "completions/mean_terminated_length": 635.5889282226562, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.851560221638962, "grad_norm": 0.33978813886642456, "learning_rate": 1e-06, "loss": -0.0675, "num_tokens": 179712281.0, "reward": 0.5446428656578064, "reward_std": 0.1726941168308258, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 1221 }, { "clip_ratio/high_max": 0.0031510016779066063, "clip_ratio/high_mean": 0.0011794015936175128, "clip_ratio/low_mean": 0.0008715644653420895, "clip_ratio/low_min": 4.572310263029067e-05, "clip_ratio/region_mean": 0.002050966075330507, "epoch": 2.853893263342082, "grad_norm": 0.3055119216442108, "learning_rate": 1e-06, "loss": -0.0677, "step": 1222 }, { "clip_ratio/high_max": 0.0033840225733001716, "clip_ratio/high_mean": 0.0012274939244889538, "clip_ratio/low_mean": 0.0011448321802163264, "clip_ratio/low_min": 1.3261192179925274e-05, "clip_ratio/region_mean": 0.0023723261037957855, "epoch": 2.856226305045203, "grad_norm": 0.2623078227043152, "learning_rate": 1e-06, "loss": -0.068, "step": 1223 }, { "clip_ratio/high_max": 0.0027565543532546144, "clip_ratio/high_mean": 0.0010786218917928636, "clip_ratio/low_mean": 0.0014120109372015577, "clip_ratio/low_min": 9.161041270999704e-05, "clip_ratio/region_mean": 0.0024906328108045273, "epoch": 2.858559346748323, "grad_norm": 0.2556232213973999, "learning_rate": 1e-06, "loss": -0.068, "step": 1224 }, { "clip_ratio/high_max": 0.002132123066985514, "clip_ratio/high_mean": 0.000865826683366322, "clip_ratio/low_mean": 0.0004899367630741835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013557634192693513, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3233.0, "completions/mean_length": 1271.3717041015625, "completions/mean_terminated_length": 633.8016967773438, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.8608923884514437, "grad_norm": 0.3052181601524353, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 180259150.0, "reward": 0.5345982313156128, "reward_std": 0.1515420526266098, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1225 }, { "clip_ratio/high_max": 0.00272468279581517, "clip_ratio/high_mean": 0.0010511810796742793, "clip_ratio/low_mean": 0.0006787887932659942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017299698738497682, "epoch": 2.863225430154564, "grad_norm": 0.3244656026363373, "learning_rate": 1e-06, "loss": -0.0545, "step": 1226 }, { "clip_ratio/high_max": 0.0026379339178674854, "clip_ratio/high_mean": 0.001092517632059753, "clip_ratio/low_mean": 0.0008542970224425517, "clip_ratio/low_min": 2.0647505152737722e-05, "clip_ratio/region_mean": 0.0019468145837890916, "epoch": 2.8655584718576845, "grad_norm": 0.2317638099193573, "learning_rate": 1e-06, "loss": -0.0546, "step": 1227 }, { "clip_ratio/high_max": 0.002808631288644392, "clip_ratio/high_mean": 0.0010549850030656671, "clip_ratio/low_mean": 0.0010281999293511035, "clip_ratio/low_min": 1.575497844896745e-05, "clip_ratio/region_mean": 0.002083184925140813, "epoch": 2.8678915135608047, "grad_norm": 0.2297726422548294, "learning_rate": 1e-06, "loss": -0.0546, "step": 1228 }, { "clip_ratio/high_max": 0.0020344149161246605, "clip_ratio/high_mean": 0.0008280365382233867, "clip_ratio/low_mean": 0.0006267763910727808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001454812940210104, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 1098.884033203125, "completions/mean_terminated_length": 635.412353515625, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.8702245552639254, "grad_norm": 0.37846052646636963, "learning_rate": 1e-06, "loss": -0.0355, "num_tokens": 180839038.0, "reward": 0.6049107313156128, "reward_std": 0.1555236279964447, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1229 }, { "clip_ratio/high_max": 0.002511028382286895, "clip_ratio/high_mean": 0.0010764880844362779, "clip_ratio/low_mean": 0.0007981017679412616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018745899287750944, "epoch": 2.872557596967046, "grad_norm": 0.2716333568096161, "learning_rate": 1e-06, "loss": -0.0358, "step": 1230 }, { "clip_ratio/high_max": 0.0027851527556777, "clip_ratio/high_mean": 0.0010962301967083476, "clip_ratio/low_mean": 0.0010780406737467274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00217427085590316, "epoch": 2.8748906386701663, "grad_norm": 0.24592313170433044, "learning_rate": 1e-06, "loss": -0.0361, "step": 1231 }, { "clip_ratio/high_max": 0.0025209656268998515, "clip_ratio/high_mean": 0.000988474640507775, "clip_ratio/low_mean": 0.0013377151772147045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023261897658812813, "epoch": 2.8772236803732865, "grad_norm": 0.2778870463371277, "learning_rate": 1e-06, "loss": -0.0361, "step": 1232 }, { "clip_ratio/high_max": 0.0020293524794396944, "clip_ratio/high_mean": 0.0008347573566425126, "clip_ratio/low_mean": 0.0005649715913023101, "clip_ratio/low_min": 1.323311425949214e-05, "clip_ratio/region_mean": 0.0013997289497638121, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 1249.10498046875, "completions/mean_terminated_length": 662.8667602539062, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 2.879556722076407, "grad_norm": 0.2888152599334717, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 181418684.0, "reward": 0.520089328289032, "reward_std": 0.1504133939743042, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1233 }, { "clip_ratio/high_max": 0.0025165256338368636, "clip_ratio/high_mean": 0.0009329646563855931, "clip_ratio/low_mean": 0.0008046613766055088, "clip_ratio/low_min": 1.8296252164873295e-05, "clip_ratio/region_mean": 0.0017376260075252503, "epoch": 2.8818897637795278, "grad_norm": 0.22874966263771057, "learning_rate": 1e-06, "loss": -0.0344, "step": 1234 }, { "clip_ratio/high_max": 0.00254748542465677, "clip_ratio/high_mean": 0.0010051646777355927, "clip_ratio/low_mean": 0.0009650562169554178, "clip_ratio/low_min": 1.3403388038568664e-05, "clip_ratio/region_mean": 0.001970220921066357, "epoch": 2.884222805482648, "grad_norm": 0.21959078311920166, "learning_rate": 1e-06, "loss": -0.0345, "step": 1235 }, { "clip_ratio/high_max": 0.0026146117743337527, "clip_ratio/high_mean": 0.000941455909924116, "clip_ratio/low_mean": 0.0010380630519648548, "clip_ratio/low_min": 5.293245703796856e-05, "clip_ratio/region_mean": 0.00197951905647642, "epoch": 2.886555847185768, "grad_norm": 0.28595098853111267, "learning_rate": 1e-06, "loss": -0.0346, "step": 1236 }, { "clip_ratio/high_max": 0.002767169673461467, "clip_ratio/high_mean": 0.0011425521242927061, "clip_ratio/low_mean": 0.0009676702266006032, "clip_ratio/low_min": 4.705656374426326e-05, "clip_ratio/region_mean": 0.0021102223327034153, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 1280.4320068359375, "completions/mean_terminated_length": 718.8259887695312, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 2.888888888888889, "grad_norm": 0.3602418303489685, "learning_rate": 1e-06, "loss": -0.0748, "num_tokens": 182040983.0, "reward": 0.4988839626312256, "reward_std": 0.20282992720603943, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1237 }, { "clip_ratio/high_max": 0.003788474656175822, "clip_ratio/high_mean": 0.0013509797827282455, "clip_ratio/low_mean": 0.0012202237157907803, "clip_ratio/low_min": 2.8419009140634444e-05, "clip_ratio/region_mean": 0.0025712034985190257, "epoch": 2.8912219305920095, "grad_norm": 0.2973235547542572, "learning_rate": 1e-06, "loss": -0.0751, "step": 1238 }, { "clip_ratio/high_max": 0.0034605427645146847, "clip_ratio/high_mean": 0.0013756530061073136, "clip_ratio/low_mean": 0.0014561256757588126, "clip_ratio/low_min": 9.341506392956944e-05, "clip_ratio/region_mean": 0.002831778627296444, "epoch": 2.8935549722951297, "grad_norm": 0.2692912220954895, "learning_rate": 1e-06, "loss": -0.0753, "step": 1239 }, { "clip_ratio/high_max": 0.0036129713262198493, "clip_ratio/high_mean": 0.0013319130812305957, "clip_ratio/low_mean": 0.001683001533820061, "clip_ratio/low_min": 7.718307097093202e-05, "clip_ratio/region_mean": 0.0030149146332405508, "epoch": 2.8958880139982504, "grad_norm": 0.27325916290283203, "learning_rate": 1e-06, "loss": -0.0754, "step": 1240 }, { "clip_ratio/high_max": 0.0032859176135389134, "clip_ratio/high_mean": 0.0013003923741052859, "clip_ratio/low_mean": 0.0005826023161716876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018829947512131184, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 1168.388427734375, "completions/mean_terminated_length": 617.0344848632812, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 2.8982210557013706, "grad_norm": 0.4320470690727234, "learning_rate": 1e-06, "loss": -0.0908, "num_tokens": 182605411.0, "reward": 0.6127232313156128, "reward_std": 0.17979852855205536, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1241 }, { "clip_ratio/high_max": 0.003133914178761188, "clip_ratio/high_mean": 0.0012624690498341806, "clip_ratio/low_mean": 0.0008868542624895781, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021493233216460794, "epoch": 2.900554097404491, "grad_norm": 0.33980104327201843, "learning_rate": 1e-06, "loss": -0.091, "step": 1242 }, { "clip_ratio/high_max": 0.003558458687621169, "clip_ratio/high_mean": 0.0014487796688626986, "clip_ratio/low_mean": 0.0011010279954462021, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002549807686591521, "epoch": 2.9028871391076114, "grad_norm": 0.2559306025505066, "learning_rate": 1e-06, "loss": -0.0913, "step": 1243 }, { "clip_ratio/high_max": 0.0033493489027023315, "clip_ratio/high_mean": 0.0013537378908949904, "clip_ratio/low_mean": 0.0012544961227831664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002608234019135125, "epoch": 2.905220180810732, "grad_norm": 0.26972684264183044, "learning_rate": 1e-06, "loss": -0.0913, "step": 1244 }, { "clip_ratio/high_max": 0.0020876192866126075, "clip_ratio/high_mean": 0.0006878449212308624, "clip_ratio/low_mean": 0.0005604099387710448, "clip_ratio/low_min": 1.269551103177946e-05, "clip_ratio/region_mean": 0.0012482548845582642, "completions/clipped_ratio": 0.1808035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3991.0, "completions/mean_length": 1270.688720703125, "completions/mean_terminated_length": 647.1185302734375, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 2.9075532225138523, "grad_norm": 0.23925110697746277, "learning_rate": 1e-06, "loss": -0.0772, "num_tokens": 183170740.0, "reward": 0.5680803656578064, "reward_std": 0.14078985154628754, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1245 }, { "clip_ratio/high_max": 0.002451334377838066, "clip_ratio/high_mean": 0.0008019774531931034, "clip_ratio/low_mean": 0.0007614677442688844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001563445184729062, "epoch": 2.909886264216973, "grad_norm": 0.24351325631141663, "learning_rate": 1e-06, "loss": -0.0773, "step": 1246 }, { "clip_ratio/high_max": 0.002328195438167313, "clip_ratio/high_mean": 0.0008338033185282256, "clip_ratio/low_mean": 0.0008368225990125211, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001670625941187609, "epoch": 2.912219305920093, "grad_norm": 0.23359844088554382, "learning_rate": 1e-06, "loss": -0.0774, "step": 1247 }, { "clip_ratio/high_max": 0.0023826419565011747, "clip_ratio/high_mean": 0.0008263971176347695, "clip_ratio/low_mean": 0.0010366100559622282, "clip_ratio/low_min": 2.891510484914761e-05, "clip_ratio/region_mean": 0.0018630072263476904, "epoch": 2.914552347623214, "grad_norm": 0.1954878568649292, "learning_rate": 1e-06, "loss": -0.0775, "step": 1248 }, { "clip_ratio/high_max": 0.002909617527620867, "clip_ratio/high_mean": 0.001141853274020832, "clip_ratio/low_mean": 0.0006739021519024391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018157554331992287, "completions/clipped_ratio": 0.1997767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3484.0, "completions/mean_length": 1377.587158203125, "completions/mean_terminated_length": 698.931640625, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 2.9168853893263345, "grad_norm": 0.3355540931224823, "learning_rate": 1e-06, "loss": -0.1229, "num_tokens": 183765530.0, "reward": 0.5133928656578064, "reward_std": 0.2020464688539505, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1249 }, { "clip_ratio/high_max": 0.003443351000896655, "clip_ratio/high_mean": 0.0013787734824290965, "clip_ratio/low_mean": 0.0009193578698614147, "clip_ratio/low_min": 1.67358411999885e-05, "clip_ratio/region_mean": 0.002298131315910723, "epoch": 2.9192184310294547, "grad_norm": 0.3063414394855499, "learning_rate": 1e-06, "loss": -0.1232, "step": 1250 }, { "clip_ratio/high_max": 0.003658572015410755, "clip_ratio/high_mean": 0.0014169354108162224, "clip_ratio/low_mean": 0.0011746121272153687, "clip_ratio/low_min": 8.36792059999425e-06, "clip_ratio/region_mean": 0.0025915475343936123, "epoch": 2.921551472732575, "grad_norm": 0.24443690478801727, "learning_rate": 1e-06, "loss": -0.1235, "step": 1251 }, { "clip_ratio/high_max": 0.0033247855681111105, "clip_ratio/high_mean": 0.0012643429545278195, "clip_ratio/low_mean": 0.0012776335424860008, "clip_ratio/low_min": 1.67358411999885e-05, "clip_ratio/region_mean": 0.002541976544307545, "epoch": 2.9238845144356955, "grad_norm": 0.2677989602088928, "learning_rate": 1e-06, "loss": -0.1235, "step": 1252 }, { "clip_ratio/high_max": 0.0031514803486061282, "clip_ratio/high_mean": 0.0010818075570568908, "clip_ratio/low_mean": 0.0005988120160509425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016806195926619694, "completions/clipped_ratio": 0.2265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3857.0, "completions/mean_length": 1470.782470703125, "completions/mean_terminated_length": 701.7792358398438, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.926217556138816, "grad_norm": 0.36490005254745483, "learning_rate": 1e-06, "loss": -0.0912, "num_tokens": 184341335.0, "reward": 0.5033482313156128, "reward_std": 0.17340727150440216, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1253 }, { "clip_ratio/high_max": 0.003511617695039604, "clip_ratio/high_mean": 0.0012043408587487647, "clip_ratio/low_mean": 0.0009400233684573323, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00214436421811115, "epoch": 2.9285505978419364, "grad_norm": 0.3625325560569763, "learning_rate": 1e-06, "loss": -0.0915, "step": 1254 }, { "clip_ratio/high_max": 0.0036964921237085946, "clip_ratio/high_mean": 0.0013299772872414906, "clip_ratio/low_mean": 0.0011411833074816968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024711605874472298, "epoch": 2.9308836395450566, "grad_norm": 0.2712169885635376, "learning_rate": 1e-06, "loss": -0.0917, "step": 1255 }, { "clip_ratio/high_max": 0.0035515071940608323, "clip_ratio/high_mean": 0.0011886032934853574, "clip_ratio/low_mean": 0.001462831896787975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026514351993682794, "epoch": 2.9332166812481772, "grad_norm": 0.28189143538475037, "learning_rate": 1e-06, "loss": -0.0918, "step": 1256 }, { "clip_ratio/high_max": 0.0029168042092351243, "clip_ratio/high_mean": 0.0011313008290017024, "clip_ratio/low_mean": 0.0006421525376936188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017734533903421834, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3267.0, "completions/mean_length": 1248.1785888671875, "completions/mean_terminated_length": 652.4804077148438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.935549722951298, "grad_norm": 0.32276833057403564, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 184919535.0, "reward": 0.5446428656578064, "reward_std": 0.17130404710769653, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 1257 }, { "clip_ratio/high_max": 0.003362276627740357, "clip_ratio/high_mean": 0.0013551082702178974, "clip_ratio/low_mean": 0.0008055710422922857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021606793234241195, "epoch": 2.937882764654418, "grad_norm": 0.2880367338657379, "learning_rate": 1e-06, "loss": -0.0692, "step": 1258 }, { "clip_ratio/high_max": 0.0032669836000422947, "clip_ratio/high_mean": 0.0013611893409688491, "clip_ratio/low_mean": 0.0010897846113948617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024509740105713718, "epoch": 2.9402158063575388, "grad_norm": 0.2908535301685333, "learning_rate": 1e-06, "loss": -0.0694, "step": 1259 }, { "clip_ratio/high_max": 0.003219017293304205, "clip_ratio/high_mean": 0.0012895310610474553, "clip_ratio/low_mean": 0.0012497504794737324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002539281500503421, "epoch": 2.942548848060659, "grad_norm": 0.25888124108314514, "learning_rate": 1e-06, "loss": -0.0695, "step": 1260 }, { "clip_ratio/high_max": 0.0031690335308667272, "clip_ratio/high_mean": 0.0010736659132817294, "clip_ratio/low_mean": 0.000851191491165082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019248574171797372, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 1107.841552734375, "completions/mean_terminated_length": 654.6246948242188, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 2.9448818897637796, "grad_norm": 0.41687285900115967, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 185523905.0, "reward": 0.5714285969734192, "reward_std": 0.17070811986923218, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1261 }, { "clip_ratio/high_max": 0.003776175442908425, "clip_ratio/high_mean": 0.0013129156450304436, "clip_ratio/low_mean": 0.0011358087704138597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024487245027557947, "epoch": 2.9472149314669, "grad_norm": 0.31890761852264404, "learning_rate": 1e-06, "loss": -0.0386, "step": 1262 }, { "clip_ratio/high_max": 0.0034182021554443054, "clip_ratio/high_mean": 0.0012003357005596627, "clip_ratio/low_mean": 0.0012306724784139078, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024310082371812314, "epoch": 2.9495479731700205, "grad_norm": 0.31810462474823, "learning_rate": 1e-06, "loss": -0.0388, "step": 1263 }, { "clip_ratio/high_max": 0.0037536010349867865, "clip_ratio/high_mean": 0.0012760615500155836, "clip_ratio/low_mean": 0.0015386828272312414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0028147443663328886, "epoch": 2.9518810148731407, "grad_norm": 0.330525279045105, "learning_rate": 1e-06, "loss": -0.0389, "step": 1264 }, { "clip_ratio/high_max": 0.003201582541805692, "clip_ratio/high_mean": 0.0011561989849724341, "clip_ratio/low_mean": 0.0007509991464758059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019071980932494625, "completions/clipped_ratio": 0.2131696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3647.0, "completions/mean_length": 1442.813720703125, "completions/mean_terminated_length": 724.007080078125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 2.9542140565762613, "grad_norm": 0.3820577561855316, "learning_rate": 1e-06, "loss": -0.1146, "num_tokens": 186121642.0, "reward": 0.520089328289032, "reward_std": 0.19857734441757202, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1265 }, { "clip_ratio/high_max": 0.0037811147049069405, "clip_ratio/high_mean": 0.001389671135257231, "clip_ratio/low_mean": 0.0010202501507592387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002409921267826576, "epoch": 2.9565470982793816, "grad_norm": 0.27282702922821045, "learning_rate": 1e-06, "loss": -0.1149, "step": 1266 }, { "clip_ratio/high_max": 0.0034939453471451998, "clip_ratio/high_mean": 0.0013106262995279394, "clip_ratio/low_mean": 0.001251272336958209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002561898661952, "epoch": 2.958880139982502, "grad_norm": 0.27828308939933777, "learning_rate": 1e-06, "loss": -0.1151, "step": 1267 }, { "clip_ratio/high_max": 0.0037431169039336964, "clip_ratio/high_mean": 0.0013020098085689824, "clip_ratio/low_mean": 0.001392197300447151, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0026942070908262394, "epoch": 2.961213181685623, "grad_norm": 0.3036958575248718, "learning_rate": 1e-06, "loss": -0.1152, "step": 1268 }, { "clip_ratio/high_max": 0.0024892619549063966, "clip_ratio/high_mean": 0.0008576363125030184, "clip_ratio/low_mean": 0.0006019526936142938, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001459589009755291, "completions/clipped_ratio": 0.1674107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2883.0, "completions/mean_length": 1226.3046875, "completions/mean_terminated_length": 649.2882080078125, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.963546223388743, "grad_norm": 0.30294013023376465, "learning_rate": 1e-06, "loss": -0.088, "num_tokens": 186695163.0, "reward": 0.5636160969734192, "reward_std": 0.1510867178440094, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 1269 }, { "clip_ratio/high_max": 0.0030007847453816794, "clip_ratio/high_mean": 0.0010451485723024234, "clip_ratio/low_mean": 0.0007863998307584552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018315484121558256, "epoch": 2.9658792650918633, "grad_norm": 0.2306789755821228, "learning_rate": 1e-06, "loss": -0.0882, "step": 1270 }, { "clip_ratio/high_max": 0.002864803740521893, "clip_ratio/high_mean": 0.0010004250652855262, "clip_ratio/low_mean": 0.0008507482689310564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018511733578634448, "epoch": 2.968212306794984, "grad_norm": 0.23171015083789825, "learning_rate": 1e-06, "loss": -0.0883, "step": 1271 }, { "clip_ratio/high_max": 0.0022417276850319467, "clip_ratio/high_mean": 0.0008938817300077062, "clip_ratio/low_mean": 0.0010590481469989754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019529298806446604, "epoch": 2.9705453484981046, "grad_norm": 0.21633273363113403, "learning_rate": 1e-06, "loss": -0.0883, "step": 1272 }, { "clip_ratio/high_max": 0.002702313297049841, "clip_ratio/high_mean": 0.000888308519279235, "clip_ratio/low_mean": 0.0006352464279189007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015235549326462205, "completions/clipped_ratio": 0.203125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 1356.8382568359375, "completions/mean_terminated_length": 658.6204833984375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.972878390201225, "grad_norm": 0.3829348683357239, "learning_rate": 1e-06, "loss": -0.0772, "num_tokens": 187255202.0, "reward": 0.5223214626312256, "reward_std": 0.15774601697921753, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 1273 }, { "clip_ratio/high_max": 0.003664480900624767, "clip_ratio/high_mean": 0.001145835303759668, "clip_ratio/low_mean": 0.0008248937992902938, "clip_ratio/low_min": 1.3519359526981134e-05, "clip_ratio/region_mean": 0.001970729099411983, "epoch": 2.9752114319043454, "grad_norm": 0.2908918261528015, "learning_rate": 1e-06, "loss": -0.0773, "step": 1274 }, { "clip_ratio/high_max": 0.0035258611969766207, "clip_ratio/high_mean": 0.0011402282234485028, "clip_ratio/low_mean": 0.0010692770429159282, "clip_ratio/low_min": 1.970987068489194e-05, "clip_ratio/region_mean": 0.0022095053136581555, "epoch": 2.9775444736074657, "grad_norm": 0.2749641239643097, "learning_rate": 1e-06, "loss": -0.0776, "step": 1275 }, { "clip_ratio/high_max": 0.0031806512270122766, "clip_ratio/high_mean": 0.001110293977035326, "clip_ratio/low_mean": 0.0012063731919624843, "clip_ratio/low_min": 1.970987068489194e-05, "clip_ratio/region_mean": 0.0023166672108345665, "epoch": 2.9798775153105863, "grad_norm": 0.2742725610733032, "learning_rate": 1e-06, "loss": -0.0776, "step": 1276 }, { "clip_ratio/high_max": 0.0028186667332192883, "clip_ratio/high_mean": 0.0011238940751354676, "clip_ratio/low_mean": 0.0006910904971846321, "clip_ratio/low_min": 3.5132095945300534e-05, "clip_ratio/region_mean": 0.0018149845491279848, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4083.0, "completions/mean_length": 1191.9520263671875, "completions/mean_terminated_length": 681.2664184570312, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 2.9822105570137065, "grad_norm": 0.3575637936592102, "learning_rate": 1e-06, "loss": -0.0632, "num_tokens": 187871031.0, "reward": 0.6194196939468384, "reward_std": 0.17720773816108704, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1277 }, { "clip_ratio/high_max": 0.003162377870467026, "clip_ratio/high_mean": 0.0012078278268745635, "clip_ratio/low_mean": 0.0010228057308268035, "clip_ratio/low_min": 3.4270047763129696e-05, "clip_ratio/region_mean": 0.002230633508588653, "epoch": 2.984543598716827, "grad_norm": 0.2947334051132202, "learning_rate": 1e-06, "loss": -0.0634, "step": 1278 }, { "clip_ratio/high_max": 0.003265526691393461, "clip_ratio/high_mean": 0.0011708471392921638, "clip_ratio/low_mean": 0.0011104116874776082, "clip_ratio/low_min": 1.7566047972650267e-05, "clip_ratio/region_mean": 0.0022812587922089733, "epoch": 2.9868766404199474, "grad_norm": 0.24825675785541534, "learning_rate": 1e-06, "loss": -0.0636, "step": 1279 }, { "clip_ratio/high_max": 0.0033663997164694592, "clip_ratio/high_mean": 0.0013209114513301756, "clip_ratio/low_mean": 0.0014029893063707277, "clip_ratio/low_min": 5.3065035899635404e-05, "clip_ratio/region_mean": 0.002723900804994628, "epoch": 2.989209682123068, "grad_norm": 0.2688109576702118, "learning_rate": 1e-06, "loss": -0.0637, "step": 1280 }, { "clip_ratio/high_max": 0.0024496471014572307, "clip_ratio/high_mean": 0.0010025371338997502, "clip_ratio/low_mean": 0.0006610128039028496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016635499450785574, "completions/clipped_ratio": 0.2020089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3437.0, "completions/mean_length": 1351.2913818359375, "completions/mean_terminated_length": 656.4769287109375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 2.9915427238261882, "grad_norm": 0.4109784662723541, "learning_rate": 1e-06, "loss": -0.0831, "num_tokens": 188444508.0, "reward": 0.5725446939468384, "reward_std": 0.16671767830848694, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1281 }, { "clip_ratio/high_max": 0.0033538812494953163, "clip_ratio/high_mean": 0.0012586799457494635, "clip_ratio/low_mean": 0.0009316196228610352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002190299579524435, "epoch": 2.993875765529309, "grad_norm": 0.3125744163990021, "learning_rate": 1e-06, "loss": -0.0833, "step": 1282 }, { "clip_ratio/high_max": 0.0035722587344935164, "clip_ratio/high_mean": 0.0013289053640619386, "clip_ratio/low_mean": 0.0011280564067419618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002456961723510176, "epoch": 2.9962088072324295, "grad_norm": 0.2602030038833618, "learning_rate": 1e-06, "loss": -0.0835, "step": 1283 }, { "clip_ratio/high_max": 0.0030436663510045037, "clip_ratio/high_mean": 0.0011226604074181523, "clip_ratio/low_mean": 0.0013129749859217554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024356354406336322, "epoch": 2.9985418489355498, "grad_norm": 0.2609206438064575, "learning_rate": 1e-06, "loss": -0.0836, "step": 1284 }, { "clip_ratio/high_max": 0.0025205490383086726, "clip_ratio/high_mean": 0.0008495939018757781, "clip_ratio/low_mean": 0.000519059556609136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013686535021406598, "completions/clipped_ratio": 0.2243303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3577.0, "completions/mean_length": 1500.4029541015625, "completions/mean_terminated_length": 749.7338256835938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.0023330417031207, "grad_norm": 0.32256942987442017, "learning_rate": 1e-06, "loss": -0.0761, "num_tokens": 189055861.0, "reward": 0.4720982313156128, "reward_std": 0.15582603216171265, "rewards/verify_math_reward/mean": 0.4720982015132904, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 1285 }, { "clip_ratio/high_max": 0.002837576321326196, "clip_ratio/high_mean": 0.0010215287147730123, "clip_ratio/low_mean": 0.0007602082423545653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001781736958946567, "epoch": 3.004666083406241, "grad_norm": 0.23747889697551727, "learning_rate": 1e-06, "loss": -0.0764, "step": 1286 }, { "clip_ratio/high_max": 0.0030404913741222117, "clip_ratio/high_mean": 0.0010446386586409062, "clip_ratio/low_mean": 0.0009466475057706703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001991286117117852, "epoch": 3.0069991251093615, "grad_norm": 0.23243752121925354, "learning_rate": 1e-06, "loss": -0.0765, "step": 1287 }, { "clip_ratio/high_max": 0.0028408026046236046, "clip_ratio/high_mean": 0.0010033117487182608, "clip_ratio/low_mean": 0.001017110549582867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002020422303758096, "epoch": 3.0093321668124817, "grad_norm": 0.2571777403354645, "learning_rate": 1e-06, "loss": -0.0766, "step": 1288 }, { "clip_ratio/high_max": 0.0020180494066153187, "clip_ratio/high_mean": 0.0007340943420786061, "clip_ratio/low_mean": 0.0005057745229350985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012398688468238106, "completions/clipped_ratio": 0.1830357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3731.0, "completions/mean_length": 1326.7890625, "completions/mean_terminated_length": 706.36474609375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 3.0116652085156024, "grad_norm": 0.350341796875, "learning_rate": 1e-06, "loss": -0.0606, "num_tokens": 189655416.0, "reward": 0.5703125, "reward_std": 0.13527169823646545, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1289 }, { "clip_ratio/high_max": 0.0026527890586294234, "clip_ratio/high_mean": 0.0009219521225531935, "clip_ratio/low_mean": 0.0006422939604817657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015642460857634433, "epoch": 3.0139982502187226, "grad_norm": 0.23119445145130157, "learning_rate": 1e-06, "loss": -0.0608, "step": 1290 }, { "clip_ratio/high_max": 0.0021421724777610507, "clip_ratio/high_mean": 0.0008223191680372111, "clip_ratio/low_mean": 0.0007526751051045721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015749942758702673, "epoch": 3.0163312919218432, "grad_norm": 0.21958255767822266, "learning_rate": 1e-06, "loss": -0.061, "step": 1291 }, { "clip_ratio/high_max": 0.002493561652954668, "clip_ratio/high_mean": 0.0009063939414772904, "clip_ratio/low_mean": 0.0010313017191947438, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019376956952328328, "epoch": 3.0186643336249634, "grad_norm": 0.20196911692619324, "learning_rate": 1e-06, "loss": -0.0611, "step": 1292 }, { "clip_ratio/high_max": 0.002537009750085417, "clip_ratio/high_mean": 0.000978603291514446, "clip_ratio/low_mean": 0.0005903242995373148, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015689275933254976, "completions/clipped_ratio": 0.2075892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3190.0, "completions/mean_length": 1396.829345703125, "completions/mean_terminated_length": 689.7225341796875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 3.020997375328084, "grad_norm": 0.375221848487854, "learning_rate": 1e-06, "loss": -0.087, "num_tokens": 190234783.0, "reward": 0.5256696939468384, "reward_std": 0.19271855056285858, "rewards/verify_math_reward/mean": 0.5256696343421936, "rewards/verify_math_reward/std": 0.4996195435523987, "step": 1293 }, { "clip_ratio/high_max": 0.0033749637150322087, "clip_ratio/high_mean": 0.001275835860724328, "clip_ratio/low_mean": 0.0007964365959196584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002072272458462976, "epoch": 3.0233304170312043, "grad_norm": 0.26271942257881165, "learning_rate": 1e-06, "loss": -0.0873, "step": 1294 }, { "clip_ratio/high_max": 0.003290289307187777, "clip_ratio/high_mean": 0.0012362351626507007, "clip_ratio/low_mean": 0.0009485341597610386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002184769276937004, "epoch": 3.025663458734325, "grad_norm": 0.2825155258178711, "learning_rate": 1e-06, "loss": -0.0875, "step": 1295 }, { "clip_ratio/high_max": 0.0031547732069157064, "clip_ratio/high_mean": 0.0012714094336843118, "clip_ratio/low_mean": 0.0011926287315873196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024640381743665785, "epoch": 3.027996500437445, "grad_norm": 0.2432880997657776, "learning_rate": 1e-06, "loss": -0.0875, "step": 1296 }, { "clip_ratio/high_max": 0.0024913674205890857, "clip_ratio/high_mean": 0.0008098549060377991, "clip_ratio/low_mean": 0.0005295911487337435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013394460802373942, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 1315.8248291015625, "completions/mean_terminated_length": 678.9396362304688, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.030329542140566, "grad_norm": 0.2805899977684021, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 190820322.0, "reward": 0.5625, "reward_std": 0.1513993740081787, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 1297 }, { "clip_ratio/high_max": 0.002850104974640999, "clip_ratio/high_mean": 0.0009773864730959758, "clip_ratio/low_mean": 0.0007491319574910449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017265184615098406, "epoch": 3.032662583843686, "grad_norm": 0.238377183675766, "learning_rate": 1e-06, "loss": -0.0624, "step": 1298 }, { "clip_ratio/high_max": 0.003161500957503449, "clip_ratio/high_mean": 0.0009961924697563518, "clip_ratio/low_mean": 0.0009126104705501348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019088029439444654, "epoch": 3.0349956255468067, "grad_norm": 0.228261336684227, "learning_rate": 1e-06, "loss": -0.0626, "step": 1299 }, { "clip_ratio/high_max": 0.002946636486740317, "clip_ratio/high_mean": 0.0009655360481701791, "clip_ratio/low_mean": 0.0010455251995153958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020110612640564796, "epoch": 3.037328667249927, "grad_norm": 0.22765207290649414, "learning_rate": 1e-06, "loss": -0.0626, "step": 1300 }, { "clip_ratio/high_max": 0.002572289333329536, "clip_ratio/high_mean": 0.0010175790775974747, "clip_ratio/low_mean": 0.0005612465474769124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015788256023370195, "completions/clipped_ratio": 0.1629464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3429.0, "completions/mean_length": 1212.0491943359375, "completions/mean_terminated_length": 650.6400146484375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 3.0396617089530475, "grad_norm": 0.3184731900691986, "learning_rate": 1e-06, "loss": -0.0827, "num_tokens": 191388566.0, "reward": 0.609375, "reward_std": 0.1598842889070511, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 1301 }, { "clip_ratio/high_max": 0.002737827966484474, "clip_ratio/high_mean": 0.001159976873168489, "clip_ratio/low_mean": 0.0006855983087916684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018455751924193464, "epoch": 3.041994750656168, "grad_norm": 0.28312474489212036, "learning_rate": 1e-06, "loss": -0.083, "step": 1302 }, { "clip_ratio/high_max": 0.003003618709044531, "clip_ratio/high_mean": 0.001171711934148334, "clip_ratio/low_mean": 0.0009604678125469945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002132179753971286, "epoch": 3.0443277923592884, "grad_norm": 0.24512238800525665, "learning_rate": 1e-06, "loss": -0.0831, "step": 1303 }, { "clip_ratio/high_max": 0.0028580743819475174, "clip_ratio/high_mean": 0.0011533901815710124, "clip_ratio/low_mean": 0.0010000799777571956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002153470122721046, "epoch": 3.046660834062409, "grad_norm": 0.3053385615348816, "learning_rate": 1e-06, "loss": -0.0831, "step": 1304 }, { "clip_ratio/high_max": 0.0020921457835356705, "clip_ratio/high_mean": 0.0007462162448064191, "clip_ratio/low_mean": 0.0004180340970378893, "clip_ratio/low_min": 1.9913972209906206e-05, "clip_ratio/region_mean": 0.0011642503486655187, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 1298.3717041015625, "completions/mean_terminated_length": 690.1915893554688, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 3.0489938757655293, "grad_norm": 0.2806278169155121, "learning_rate": 1e-06, "loss": -0.058, "num_tokens": 191988699.0, "reward": 0.5792410969734192, "reward_std": 0.1340659260749817, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1305 }, { "clip_ratio/high_max": 0.002359187856200151, "clip_ratio/high_mean": 0.0009155851730611175, "clip_ratio/low_mean": 0.0005765044097643113, "clip_ratio/low_min": 2.3773298380547203e-05, "clip_ratio/region_mean": 0.0014920895664545242, "epoch": 3.05132691746865, "grad_norm": 0.28606978058815, "learning_rate": 1e-06, "loss": -0.0582, "step": 1306 }, { "clip_ratio/high_max": 0.0025636116333771497, "clip_ratio/high_mean": 0.0008925399761210429, "clip_ratio/low_mean": 0.0007514707058362546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001644010713789612, "epoch": 3.05365995917177, "grad_norm": 0.1945660561323166, "learning_rate": 1e-06, "loss": -0.0584, "step": 1307 }, { "clip_ratio/high_max": 0.0023899032203189563, "clip_ratio/high_mean": 0.0008637774208182236, "clip_ratio/low_mean": 0.0008972271461971104, "clip_ratio/low_min": 1.9913972209906206e-05, "clip_ratio/region_mean": 0.001761004503350705, "epoch": 3.055993000874891, "grad_norm": 0.20888541638851166, "learning_rate": 1e-06, "loss": -0.0585, "step": 1308 }, { "clip_ratio/high_max": 0.0028958607435924932, "clip_ratio/high_mean": 0.0011245208734180778, "clip_ratio/low_mean": 0.0005285132112931024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016530340944882482, "completions/clipped_ratio": 0.2321428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 1454.505615234375, "completions/mean_terminated_length": 655.9142456054688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.058326042578011, "grad_norm": 0.30097147822380066, "learning_rate": 1e-06, "loss": -0.0859, "num_tokens": 192532720.0, "reward": 0.5558035969734192, "reward_std": 0.1601438969373703, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 1309 }, { "clip_ratio/high_max": 0.003858191121253185, "clip_ratio/high_mean": 0.0013613364571938291, "clip_ratio/low_mean": 0.0007244455564432428, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020857820054516196, "epoch": 3.0606590842811316, "grad_norm": 0.29711002111434937, "learning_rate": 1e-06, "loss": -0.0862, "step": 1310 }, { "clip_ratio/high_max": 0.0034845851332647726, "clip_ratio/high_mean": 0.0013190685895096976, "clip_ratio/low_mean": 0.0008171836466317473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021362522566050757, "epoch": 3.062992125984252, "grad_norm": 0.2635003328323364, "learning_rate": 1e-06, "loss": -0.0863, "step": 1311 }, { "clip_ratio/high_max": 0.0035168484901078045, "clip_ratio/high_mean": 0.001348177953332197, "clip_ratio/low_mean": 0.0010513546640140703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002399532611889299, "epoch": 3.0653251676873725, "grad_norm": 0.24283172190189362, "learning_rate": 1e-06, "loss": -0.0864, "step": 1312 }, { "clip_ratio/high_max": 0.0025021589008247247, "clip_ratio/high_mean": 0.0010183860122197075, "clip_ratio/low_mean": 0.0004949986587234889, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001513384671852691, "completions/clipped_ratio": 0.1863839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 1248.216552734375, "completions/mean_terminated_length": 595.8436279296875, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 3.0676582093904927, "grad_norm": 0.37111696600914, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 193057898.0, "reward": 0.6037946939468384, "reward_std": 0.1543617695569992, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1313 }, { "clip_ratio/high_max": 0.0034964400474564172, "clip_ratio/high_mean": 0.001275072372663999, "clip_ratio/low_mean": 0.0007646513663530641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020397236621647608, "epoch": 3.0699912510936134, "grad_norm": 0.2912351191043854, "learning_rate": 1e-06, "loss": -0.0605, "step": 1314 }, { "clip_ratio/high_max": 0.003168079365423182, "clip_ratio/high_mean": 0.0012253887607585057, "clip_ratio/low_mean": 0.0009435755418962799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021689642599085346, "epoch": 3.0723242927967336, "grad_norm": 0.2485773265361786, "learning_rate": 1e-06, "loss": -0.0607, "step": 1315 }, { "clip_ratio/high_max": 0.0031388264724228065, "clip_ratio/high_mean": 0.0012513939327618573, "clip_ratio/low_mean": 0.0010505451255085063, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023019390864646994, "epoch": 3.0746573344998542, "grad_norm": 0.26045605540275574, "learning_rate": 1e-06, "loss": -0.0607, "step": 1316 }, { "clip_ratio/high_max": 0.0022871091787237674, "clip_ratio/high_mean": 0.000889468803507043, "clip_ratio/low_mean": 0.0005702042794837325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014596730943594594, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3486.0, "completions/mean_length": 1329.7109375, "completions/mean_terminated_length": 714.5607299804688, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 3.0769903762029744, "grad_norm": 0.35755103826522827, "learning_rate": 1e-06, "loss": -0.075, "num_tokens": 193677407.0, "reward": 0.5011160969734192, "reward_std": 0.15090125799179077, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1317 }, { "clip_ratio/high_max": 0.00302785503299674, "clip_ratio/high_mean": 0.0011055794529966079, "clip_ratio/low_mean": 0.0008301125717480318, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019356920092832297, "epoch": 3.079323417906095, "grad_norm": 0.23217631876468658, "learning_rate": 1e-06, "loss": -0.0752, "step": 1318 }, { "clip_ratio/high_max": 0.0029139655307517387, "clip_ratio/high_mean": 0.0010585647978587076, "clip_ratio/low_mean": 0.0008228557717302465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018814205541275442, "epoch": 3.0816564596092153, "grad_norm": 0.24049969017505646, "learning_rate": 1e-06, "loss": -0.0753, "step": 1319 }, { "clip_ratio/high_max": 0.002689016386284493, "clip_ratio/high_mean": 0.0010127902824024204, "clip_ratio/low_mean": 0.0010295547544956207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00204234504053602, "epoch": 3.083989501312336, "grad_norm": 0.22247229516506195, "learning_rate": 1e-06, "loss": -0.0754, "step": 1320 }, { "clip_ratio/high_max": 0.0022223807318368927, "clip_ratio/high_mean": 0.000926491069549229, "clip_ratio/low_mean": 0.0005344799919839716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014609710924560204, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3970.0, "completions/mean_length": 1175.2913818359375, "completions/mean_terminated_length": 634.4193115234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.0863225430154566, "grad_norm": 0.3505702614784241, "learning_rate": 1e-06, "loss": -0.0763, "num_tokens": 194243804.0, "reward": 0.637276828289032, "reward_std": 0.16040420532226562, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1321 }, { "clip_ratio/high_max": 0.002552797508542426, "clip_ratio/high_mean": 0.00113710213554441, "clip_ratio/low_mean": 0.0007302283574972535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00186733047303278, "epoch": 3.088655584718577, "grad_norm": 0.27257534861564636, "learning_rate": 1e-06, "loss": -0.0766, "step": 1322 }, { "clip_ratio/high_max": 0.0024929726532718632, "clip_ratio/high_mean": 0.0010018376469815848, "clip_ratio/low_mean": 0.0008190456974261906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00182088326982921, "epoch": 3.0909886264216975, "grad_norm": 0.2139657884836197, "learning_rate": 1e-06, "loss": -0.0767, "step": 1323 }, { "clip_ratio/high_max": 0.0023928607697598636, "clip_ratio/high_mean": 0.001074929530659574, "clip_ratio/low_mean": 0.0009412316030648071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020161611028015614, "epoch": 3.0933216681248177, "grad_norm": 0.21198685467243195, "learning_rate": 1e-06, "loss": -0.0768, "step": 1324 }, { "clip_ratio/high_max": 0.0020417028936208226, "clip_ratio/high_mean": 0.0007766804992570542, "clip_ratio/low_mean": 0.0005703795477529638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013470600388245657, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3445.0, "completions/mean_length": 1186.0101318359375, "completions/mean_terminated_length": 665.2750244140625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 3.0956547098279383, "grad_norm": 0.2741696238517761, "learning_rate": 1e-06, "loss": -0.058, "num_tokens": 194836629.0, "reward": 0.5915178656578064, "reward_std": 0.15567587316036224, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1325 }, { "clip_ratio/high_max": 0.002530857287638355, "clip_ratio/high_mean": 0.0010029010190919507, "clip_ratio/low_mean": 0.0007089033119882515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017118043251684867, "epoch": 3.0979877515310585, "grad_norm": 0.24755199253559113, "learning_rate": 1e-06, "loss": -0.0582, "step": 1326 }, { "clip_ratio/high_max": 0.0025047990056918934, "clip_ratio/high_mean": 0.0009640709140512627, "clip_ratio/low_mean": 0.0007944907483761199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001758561615133658, "epoch": 3.100320793234179, "grad_norm": 0.32930606603622437, "learning_rate": 1e-06, "loss": -0.0583, "step": 1327 }, { "clip_ratio/high_max": 0.0024294221111631487, "clip_ratio/high_mean": 0.0008861384922056459, "clip_ratio/low_mean": 0.0010210815198661294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019072200229857117, "epoch": 3.1026538349372994, "grad_norm": 0.20770609378814697, "learning_rate": 1e-06, "loss": -0.0584, "step": 1328 }, { "clip_ratio/high_max": 0.0025223410448234063, "clip_ratio/high_mean": 0.0008677193072799128, "clip_ratio/low_mean": 0.0005741742888858425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014418935825233348, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2366.0, "completions/mean_length": 1134.923095703125, "completions/mean_terminated_length": 581.9271850585938, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.10498687664042, "grad_norm": 0.3837740421295166, "learning_rate": 1e-06, "loss": -0.0606, "num_tokens": 195371312.0, "reward": 0.613839328289032, "reward_std": 0.1515427529811859, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1329 }, { "clip_ratio/high_max": 0.0028084589139325544, "clip_ratio/high_mean": 0.0010732359787652967, "clip_ratio/low_mean": 0.0008945150748331798, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001967751064512413, "epoch": 3.1073199183435403, "grad_norm": 0.3349197208881378, "learning_rate": 1e-06, "loss": -0.0609, "step": 1330 }, { "clip_ratio/high_max": 0.0028585824693436734, "clip_ratio/high_mean": 0.0010309245662938338, "clip_ratio/low_mean": 0.0009570273650751915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001987951938644983, "epoch": 3.109652960046661, "grad_norm": 0.2798576354980469, "learning_rate": 1e-06, "loss": -0.0611, "step": 1331 }, { "clip_ratio/high_max": 0.0028027815860696137, "clip_ratio/high_mean": 0.0010115866825799458, "clip_ratio/low_mean": 0.0010797360355354613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002091322712658439, "epoch": 3.111986001749781, "grad_norm": 0.2401420623064041, "learning_rate": 1e-06, "loss": -0.0612, "step": 1332 }, { "clip_ratio/high_max": 0.002997489478730131, "clip_ratio/high_mean": 0.0010483501500857528, "clip_ratio/low_mean": 0.0006875655117255519, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017359156845486723, "completions/clipped_ratio": 0.2075892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 1388.8985595703125, "completions/mean_terminated_length": 679.7140502929688, "completions/min_length": 205.0, "completions/min_terminated_length": 205.0, "epoch": 3.114319043452902, "grad_norm": 0.2810341417789459, "learning_rate": 1e-06, "loss": -0.0808, "num_tokens": 195954741.0, "reward": 0.5390625, "reward_std": 0.148578941822052, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 1333 }, { "clip_ratio/high_max": 0.0032104247584356926, "clip_ratio/high_mean": 0.0011600665020523593, "clip_ratio/low_mean": 0.000777182036472368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019372485403437167, "epoch": 3.116652085156022, "grad_norm": 0.26112329959869385, "learning_rate": 1e-06, "loss": -0.081, "step": 1334 }, { "clip_ratio/high_max": 0.0038014510937500745, "clip_ratio/high_mean": 0.0013046984822722152, "clip_ratio/low_mean": 0.0011017159140465083, "clip_ratio/low_min": 1.90200844372157e-05, "clip_ratio/region_mean": 0.0024064143872237764, "epoch": 3.1189851268591426, "grad_norm": 0.22285813093185425, "learning_rate": 1e-06, "loss": -0.0812, "step": 1335 }, { "clip_ratio/high_max": 0.0032983868804876693, "clip_ratio/high_mean": 0.0011769670236390084, "clip_ratio/low_mean": 0.0011258692848059582, "clip_ratio/low_min": 9.577076525602024e-06, "clip_ratio/region_mean": 0.002302836270246189, "epoch": 3.121318168562263, "grad_norm": 0.22714033722877502, "learning_rate": 1e-06, "loss": -0.0812, "step": 1336 }, { "clip_ratio/high_max": 0.002241811485873768, "clip_ratio/high_mean": 0.000784858597398852, "clip_ratio/low_mean": 0.0004849584456678713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012698170139628928, "completions/clipped_ratio": 0.1930803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3163.0, "completions/mean_length": 1253.9598388671875, "completions/mean_terminated_length": 573.9142456054688, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 3.1236512102653835, "grad_norm": 0.361855685710907, "learning_rate": 1e-06, "loss": -0.0635, "num_tokens": 196451225.0, "reward": 0.6305803656578064, "reward_std": 0.13080225884914398, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1337 }, { "clip_ratio/high_max": 0.002304529039975023, "clip_ratio/high_mean": 0.0008729561104701133, "clip_ratio/low_mean": 0.0006413904757209821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015143465643632226, "epoch": 3.1259842519685037, "grad_norm": 0.26827290654182434, "learning_rate": 1e-06, "loss": -0.0636, "step": 1338 }, { "clip_ratio/high_max": 0.0027912609184568282, "clip_ratio/high_mean": 0.0009762080953805707, "clip_ratio/low_mean": 0.0008614428024884546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018376508523942903, "epoch": 3.1283172936716244, "grad_norm": 0.21968616545200348, "learning_rate": 1e-06, "loss": -0.0637, "step": 1339 }, { "clip_ratio/high_max": 0.0025011100333358627, "clip_ratio/high_mean": 0.0008304880302603124, "clip_ratio/low_mean": 0.0009597744519851403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001790262478607474, "epoch": 3.130650335374745, "grad_norm": 0.22235487401485443, "learning_rate": 1e-06, "loss": -0.0639, "step": 1340 }, { "clip_ratio/high_max": 0.002429226202366408, "clip_ratio/high_mean": 0.0009180686429317575, "clip_ratio/low_mean": 0.0007322976243813173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016503662773175165, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 1257.1138916015625, "completions/mean_terminated_length": 699.9492797851562, "completions/min_length": 217.0, "completions/min_terminated_length": 217.0, "epoch": 3.1329833770778652, "grad_norm": 0.33593234419822693, "learning_rate": 1e-06, "loss": -0.0803, "num_tokens": 197065823.0, "reward": 0.5133928656578064, "reward_std": 0.1994110643863678, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 1341 }, { "clip_ratio/high_max": 0.0027345400958438404, "clip_ratio/high_mean": 0.001094697949156398, "clip_ratio/low_mean": 0.0009638746232667472, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002058572594251018, "epoch": 3.135316418780986, "grad_norm": 0.3292315602302551, "learning_rate": 1e-06, "loss": -0.0805, "step": 1342 }, { "clip_ratio/high_max": 0.003092270919296425, "clip_ratio/high_mean": 0.0011939085452468134, "clip_ratio/low_mean": 0.0012318274784774985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002425736100121867, "epoch": 3.137649460484106, "grad_norm": 0.24277986586093903, "learning_rate": 1e-06, "loss": -0.0807, "step": 1343 }, { "clip_ratio/high_max": 0.003047489677555859, "clip_ratio/high_mean": 0.00112897347935359, "clip_ratio/low_mean": 0.0013810614182148129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025100349012063816, "epoch": 3.1399825021872267, "grad_norm": 0.22549711167812347, "learning_rate": 1e-06, "loss": -0.0808, "step": 1344 }, { "clip_ratio/high_max": 0.002825524723448325, "clip_ratio/high_mean": 0.0009551564926368883, "clip_ratio/low_mean": 0.0005220908387855161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014772473223274574, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 1180.544677734375, "completions/mean_terminated_length": 617.6404418945312, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.142315543890347, "grad_norm": 0.3498491644859314, "learning_rate": 1e-06, "loss": -0.0639, "num_tokens": 197616975.0, "reward": 0.6238839626312256, "reward_std": 0.13414262235164642, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 1345 }, { "clip_ratio/high_max": 0.002978554868604988, "clip_ratio/high_mean": 0.0010518930212128907, "clip_ratio/low_mean": 0.0006826523404015461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001734545348881511, "epoch": 3.1446485855934676, "grad_norm": 0.31992146372795105, "learning_rate": 1e-06, "loss": -0.0641, "step": 1346 }, { "clip_ratio/high_max": 0.0032743963893153705, "clip_ratio/high_mean": 0.0010800384643516736, "clip_ratio/low_mean": 0.0009262666280847043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020063050906173885, "epoch": 3.146981627296588, "grad_norm": 0.28409144282341003, "learning_rate": 1e-06, "loss": -0.0643, "step": 1347 }, { "clip_ratio/high_max": 0.00318326481647091, "clip_ratio/high_mean": 0.0010985936642100569, "clip_ratio/low_mean": 0.0011267528734606458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002225346521299798, "epoch": 3.1493146689997085, "grad_norm": 0.233980193734169, "learning_rate": 1e-06, "loss": -0.0644, "step": 1348 }, { "clip_ratio/high_max": 0.002139932978025172, "clip_ratio/high_mean": 0.0009107685109484009, "clip_ratio/low_mean": 0.000569330952657765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014800994686083868, "completions/clipped_ratio": 0.2287946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3305.0, "completions/mean_length": 1480.96435546875, "completions/mean_terminated_length": 705.15771484375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.1516477107028287, "grad_norm": 0.3200514018535614, "learning_rate": 1e-06, "loss": -0.0999, "num_tokens": 198218823.0, "reward": 0.4810267984867096, "reward_std": 0.15747570991516113, "rewards/verify_math_reward/mean": 0.4810267984867096, "rewards/verify_math_reward/std": 0.49991899728775024, "step": 1349 }, { "clip_ratio/high_max": 0.003589366293454077, "clip_ratio/high_mean": 0.0013790113334835041, "clip_ratio/low_mean": 0.0007359550145338289, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002114966333465418, "epoch": 3.1539807524059493, "grad_norm": 0.2880544364452362, "learning_rate": 1e-06, "loss": -0.1002, "step": 1350 }, { "clip_ratio/high_max": 0.0030352926878549624, "clip_ratio/high_mean": 0.0012039461980748456, "clip_ratio/low_mean": 0.0009654550049162935, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021694011993531603, "epoch": 3.1563137941090695, "grad_norm": 0.22166696190834045, "learning_rate": 1e-06, "loss": -0.1003, "step": 1351 }, { "clip_ratio/high_max": 0.0034358482298557647, "clip_ratio/high_mean": 0.0012475168659875635, "clip_ratio/low_mean": 0.0010006393131334335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022481561463791877, "epoch": 3.15864683581219, "grad_norm": 0.3134857714176178, "learning_rate": 1e-06, "loss": -0.1004, "step": 1352 }, { "clip_ratio/high_max": 0.0018772271469060797, "clip_ratio/high_mean": 0.0006525577919092029, "clip_ratio/low_mean": 0.0005370141607272672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001189571961731417, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3762.0, "completions/mean_length": 1193.2578125, "completions/mean_terminated_length": 664.790283203125, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 3.1609798775153104, "grad_norm": 0.2849535048007965, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 198815262.0, "reward": 0.5714285969734192, "reward_std": 0.14327509701251984, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 1353 }, { "clip_ratio/high_max": 0.0023393456867779605, "clip_ratio/high_mean": 0.0008391679275518982, "clip_ratio/low_mean": 0.0008357585793419275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001674926512350794, "epoch": 3.163312919218431, "grad_norm": 0.20166900753974915, "learning_rate": 1e-06, "loss": -0.0674, "step": 1354 }, { "clip_ratio/high_max": 0.002438125266053248, "clip_ratio/high_mean": 0.0008892816931620473, "clip_ratio/low_mean": 0.0008286072243208764, "clip_ratio/low_min": 7.640586773050018e-06, "clip_ratio/region_mean": 0.0017178889029310085, "epoch": 3.1656459609215517, "grad_norm": 0.22612257301807404, "learning_rate": 1e-06, "loss": -0.0674, "step": 1355 }, { "clip_ratio/high_max": 0.0021777985348307993, "clip_ratio/high_mean": 0.0007701308604737278, "clip_ratio/low_mean": 0.001180693635433272, "clip_ratio/low_min": 2.2921760319150053e-05, "clip_ratio/region_mean": 0.0019508245386532508, "epoch": 3.167979002624672, "grad_norm": 0.19750025868415833, "learning_rate": 1e-06, "loss": -0.0676, "step": 1356 }, { "clip_ratio/high_max": 0.0021138735828571953, "clip_ratio/high_mean": 0.0008293652481370373, "clip_ratio/low_mean": 0.0005782775979241705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014076428415137343, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3315.0, "completions/mean_length": 1272.3973388671875, "completions/mean_terminated_length": 630.3178100585938, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 3.1703120443277926, "grad_norm": 0.3227970600128174, "learning_rate": 1e-06, "loss": -0.0737, "num_tokens": 199360290.0, "reward": 0.5736607313156128, "reward_std": 0.148612841963768, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 1357 }, { "clip_ratio/high_max": 0.0030250952040660195, "clip_ratio/high_mean": 0.0011142691564600682, "clip_ratio/low_mean": 0.0007781955441714672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018924646901723463, "epoch": 3.1726450860309128, "grad_norm": 0.246768981218338, "learning_rate": 1e-06, "loss": -0.0739, "step": 1358 }, { "clip_ratio/high_max": 0.0028133710147812963, "clip_ratio/high_mean": 0.001045112068823073, "clip_ratio/low_mean": 0.000986219386504672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002031331448961282, "epoch": 3.1749781277340334, "grad_norm": 0.2945663034915924, "learning_rate": 1e-06, "loss": -0.0742, "step": 1359 }, { "clip_ratio/high_max": 0.0024650287450640462, "clip_ratio/high_mean": 0.0009208102055708878, "clip_ratio/low_mean": 0.0010797965260280762, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00200060671340907, "epoch": 3.1773111694371536, "grad_norm": 0.22011803090572357, "learning_rate": 1e-06, "loss": -0.0742, "step": 1360 }, { "clip_ratio/high_max": 0.0028081150812795386, "clip_ratio/high_mean": 0.0010802435426739976, "clip_ratio/low_mean": 0.0005825233292853227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016627668555884156, "completions/clipped_ratio": 0.1908482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3346.0, "completions/mean_length": 1339.078125, "completions/mean_terminated_length": 688.8248291015625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.1796442111402743, "grad_norm": 0.3312619924545288, "learning_rate": 1e-06, "loss": -0.1132, "num_tokens": 199942360.0, "reward": 0.520089328289032, "reward_std": 0.1837480217218399, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1361 }, { "clip_ratio/high_max": 0.0037408599819173105, "clip_ratio/high_mean": 0.001404201702825958, "clip_ratio/low_mean": 0.0008379365717701148, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022421382745960727, "epoch": 3.1819772528433945, "grad_norm": 0.29121676087379456, "learning_rate": 1e-06, "loss": -0.1135, "step": 1362 }, { "clip_ratio/high_max": 0.0035552595218177885, "clip_ratio/high_mean": 0.001331019233475672, "clip_ratio/low_mean": 0.0009413602620043093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002272379490023013, "epoch": 3.184310294546515, "grad_norm": 0.24848061800003052, "learning_rate": 1e-06, "loss": -0.1137, "step": 1363 }, { "clip_ratio/high_max": 0.003499184087559115, "clip_ratio/high_mean": 0.0012648745578189846, "clip_ratio/low_mean": 0.001018966988340253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022838415825390257, "epoch": 3.1866433362496354, "grad_norm": 0.2734673321247101, "learning_rate": 1e-06, "loss": -0.1138, "step": 1364 }, { "clip_ratio/high_max": 0.0033895653759827837, "clip_ratio/high_mean": 0.001157745202363003, "clip_ratio/low_mean": 0.0006268017434649664, "clip_ratio/low_min": 9.797773600439541e-06, "clip_ratio/region_mean": 0.001784546908311313, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3118.0, "completions/mean_length": 1455.985595703125, "completions/mean_terminated_length": 735.9815673828125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 3.188976377952756, "grad_norm": 0.3164910078048706, "learning_rate": 1e-06, "loss": -0.1014, "num_tokens": 200553963.0, "reward": 0.504464328289032, "reward_std": 0.18614476919174194, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 1365 }, { "clip_ratio/high_max": 0.0032233367674052715, "clip_ratio/high_mean": 0.0011809509014710784, "clip_ratio/low_mean": 0.0009057749102794332, "clip_ratio/low_min": 1.9595547200879082e-05, "clip_ratio/region_mean": 0.00208672575536184, "epoch": 3.1913094196558762, "grad_norm": 0.28135693073272705, "learning_rate": 1e-06, "loss": -0.1016, "step": 1366 }, { "clip_ratio/high_max": 0.0033793382317526266, "clip_ratio/high_mean": 0.0012781320911017247, "clip_ratio/low_mean": 0.0011014879482900142, "clip_ratio/low_min": 3.9191094401758164e-05, "clip_ratio/region_mean": 0.002379620047577191, "epoch": 3.193642461358997, "grad_norm": 0.23779508471488953, "learning_rate": 1e-06, "loss": -0.1018, "step": 1367 }, { "clip_ratio/high_max": 0.0032954135313048027, "clip_ratio/high_mean": 0.0012228189552843105, "clip_ratio/low_mean": 0.00129534852476354, "clip_ratio/low_min": 1.9595547200879082e-05, "clip_ratio/region_mean": 0.002518167566449847, "epoch": 3.195975503062117, "grad_norm": 0.24185238778591156, "learning_rate": 1e-06, "loss": -0.1019, "step": 1368 }, { "clip_ratio/high_max": 0.0022182962638908066, "clip_ratio/high_mean": 0.0008027746625884902, "clip_ratio/low_mean": 0.0006631616515733185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001465936285967473, "completions/clipped_ratio": 0.2008928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3785.0, "completions/mean_length": 1357.7545166015625, "completions/mean_terminated_length": 669.3687133789062, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.1983085447652377, "grad_norm": 0.30065393447875977, "learning_rate": 1e-06, "loss": -0.0765, "num_tokens": 201119335.0, "reward": 0.5546875, "reward_std": 0.14902400970458984, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 1369 }, { "clip_ratio/high_max": 0.002445177990011871, "clip_ratio/high_mean": 0.0009415408603672404, "clip_ratio/low_mean": 0.0008758692583796801, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00181741012056591, "epoch": 3.200641586468358, "grad_norm": 0.32187435030937195, "learning_rate": 1e-06, "loss": -0.0765, "step": 1370 }, { "clip_ratio/high_max": 0.0026672029853216372, "clip_ratio/high_mean": 0.0010266938543281867, "clip_ratio/low_mean": 0.001034640639772988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020613344968296587, "epoch": 3.2029746281714786, "grad_norm": 0.26315632462501526, "learning_rate": 1e-06, "loss": -0.0768, "step": 1371 }, { "clip_ratio/high_max": 0.0024598382879048586, "clip_ratio/high_mean": 0.001008868899589288, "clip_ratio/low_mean": 0.0014022009199834429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024110698213917203, "epoch": 3.205307669874599, "grad_norm": 0.23852814733982086, "learning_rate": 1e-06, "loss": -0.0769, "step": 1372 }, { "clip_ratio/high_max": 0.0026599104603519663, "clip_ratio/high_mean": 0.001088276407244848, "clip_ratio/low_mean": 0.00048437131954415236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015726477358839475, "completions/clipped_ratio": 0.2120535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3145.0, "completions/mean_length": 1406.2344970703125, "completions/mean_terminated_length": 682.3597412109375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 3.2076407115777195, "grad_norm": 0.3115938603878021, "learning_rate": 1e-06, "loss": -0.0721, "num_tokens": 201689641.0, "reward": 0.5022321939468384, "reward_std": 0.16660960018634796, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 1373 }, { "clip_ratio/high_max": 0.002917425306804944, "clip_ratio/high_mean": 0.0012557908485177904, "clip_ratio/low_mean": 0.0006526048437081045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019083957158727571, "epoch": 3.20997375328084, "grad_norm": 0.33406734466552734, "learning_rate": 1e-06, "loss": -0.0724, "step": 1374 }, { "clip_ratio/high_max": 0.00291656961053377, "clip_ratio/high_mean": 0.001195302145788446, "clip_ratio/low_mean": 0.0007796578788656916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019749599960050546, "epoch": 3.2123067949839603, "grad_norm": 0.26957711577415466, "learning_rate": 1e-06, "loss": -0.0725, "step": 1375 }, { "clip_ratio/high_max": 0.0030238851468311623, "clip_ratio/high_mean": 0.001208049950946588, "clip_ratio/low_mean": 0.0008903388452381478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002098388780723326, "epoch": 3.214639836687081, "grad_norm": 0.25391343235969543, "learning_rate": 1e-06, "loss": -0.0725, "step": 1376 }, { "clip_ratio/high_max": 0.0023889686999609694, "clip_ratio/high_mean": 0.000987817853456363, "clip_ratio/low_mean": 0.0006864479155410663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016742657790018711, "completions/clipped_ratio": 0.2209821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3866.0, "completions/mean_length": 1485.3751220703125, "completions/mean_terminated_length": 744.8252563476562, "completions/min_length": 206.0, "completions/min_terminated_length": 206.0, "epoch": 3.216972878390201, "grad_norm": 0.2967849373817444, "learning_rate": 1e-06, "loss": -0.098, "num_tokens": 202304313.0, "reward": 0.4765625298023224, "reward_std": 0.1740477830171585, "rewards/verify_math_reward/mean": 0.4765625, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1377 }, { "clip_ratio/high_max": 0.0024445352246402763, "clip_ratio/high_mean": 0.0010376444915891625, "clip_ratio/low_mean": 0.0009159694309346378, "clip_ratio/low_min": 1.9647908629849553e-05, "clip_ratio/region_mean": 0.001953613871592097, "epoch": 3.219305920093322, "grad_norm": 0.30563458800315857, "learning_rate": 1e-06, "loss": -0.0981, "step": 1378 }, { "clip_ratio/high_max": 0.0023477618451579474, "clip_ratio/high_mean": 0.0010650950243871193, "clip_ratio/low_mean": 0.0010782867429952603, "clip_ratio/low_min": 1.60503332153894e-05, "clip_ratio/region_mean": 0.0021433818037621677, "epoch": 3.221638961796442, "grad_norm": 0.263799786567688, "learning_rate": 1e-06, "loss": -0.0984, "step": 1379 }, { "clip_ratio/high_max": 0.002721808406931814, "clip_ratio/high_mean": 0.0011329717963235453, "clip_ratio/low_mean": 0.0011568615827854956, "clip_ratio/low_min": 5.0730519433273e-05, "clip_ratio/region_mean": 0.0022898333336343057, "epoch": 3.2239720034995627, "grad_norm": 0.23858009278774261, "learning_rate": 1e-06, "loss": -0.0985, "step": 1380 }, { "clip_ratio/high_max": 0.0021201352865318768, "clip_ratio/high_mean": 0.000750652090573567, "clip_ratio/low_mean": 0.0005253635126791778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001276015624171123, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3517.0, "completions/mean_length": 1212.1685791015625, "completions/mean_terminated_length": 622.9986572265625, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 3.226305045202683, "grad_norm": 0.316988080739975, "learning_rate": 1e-06, "loss": -0.0555, "num_tokens": 202851776.0, "reward": 0.621651828289032, "reward_std": 0.1516508162021637, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 1381 }, { "clip_ratio/high_max": 0.0027541725648916326, "clip_ratio/high_mean": 0.0009396336899953894, "clip_ratio/low_mean": 0.0006619510313612409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016015847795642912, "epoch": 3.2286380869058036, "grad_norm": 0.25705021619796753, "learning_rate": 1e-06, "loss": -0.0557, "step": 1382 }, { "clip_ratio/high_max": 0.002651364127814304, "clip_ratio/high_mean": 0.000985535503787105, "clip_ratio/low_mean": 0.0008871033442119369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018726388880168088, "epoch": 3.2309711286089238, "grad_norm": 0.31974393129348755, "learning_rate": 1e-06, "loss": -0.0558, "step": 1383 }, { "clip_ratio/high_max": 0.0023458923678845167, "clip_ratio/high_mean": 0.0008305495575768873, "clip_ratio/low_mean": 0.0010812032996909693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019117528572678566, "epoch": 3.2333041703120444, "grad_norm": 0.2726139724254608, "learning_rate": 1e-06, "loss": -0.0558, "step": 1384 }, { "clip_ratio/high_max": 0.0024199914041673765, "clip_ratio/high_mean": 0.0009865360079857055, "clip_ratio/low_mean": 0.0006013246538714156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015878607009653933, "completions/clipped_ratio": 0.1875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3653.0, "completions/mean_length": 1295.06591796875, "completions/mean_terminated_length": 648.6964721679688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 3.2356372120151646, "grad_norm": 0.31969305872917175, "learning_rate": 1e-06, "loss": -0.0838, "num_tokens": 203415259.0, "reward": 0.5569196939468384, "reward_std": 0.15097934007644653, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 1385 }, { "clip_ratio/high_max": 0.0032785937219159678, "clip_ratio/high_mean": 0.0013131868727214169, "clip_ratio/low_mean": 0.000833667117603909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021468539998750202, "epoch": 3.2379702537182853, "grad_norm": 0.25600868463516235, "learning_rate": 1e-06, "loss": -0.084, "step": 1386 }, { "clip_ratio/high_max": 0.002963113114674343, "clip_ratio/high_mean": 0.0012603523737197975, "clip_ratio/low_mean": 0.0008865321888151811, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002146884515241254, "epoch": 3.2403032954214055, "grad_norm": 0.22946487367153168, "learning_rate": 1e-06, "loss": -0.0842, "step": 1387 }, { "clip_ratio/high_max": 0.002728587744059041, "clip_ratio/high_mean": 0.001143316458183108, "clip_ratio/low_mean": 0.0010875751013372792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002230891514045652, "epoch": 3.242636337124526, "grad_norm": 0.20421616733074188, "learning_rate": 1e-06, "loss": -0.0842, "step": 1388 }, { "clip_ratio/high_max": 0.0031228039442794397, "clip_ratio/high_mean": 0.0011625596416706685, "clip_ratio/low_mean": 0.000449032223514223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016115919133881107, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3356.0, "completions/mean_length": 1118.03466796875, "completions/mean_terminated_length": 648.6395263671875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 3.2449693788276464, "grad_norm": 0.3249204456806183, "learning_rate": 1e-06, "loss": -0.0685, "num_tokens": 204008506.0, "reward": 0.5390625, "reward_std": 0.18876947462558746, "rewards/verify_math_reward/mean": 0.5390625, "rewards/verify_math_reward/std": 0.4987502098083496, "step": 1389 }, { "clip_ratio/high_max": 0.0030883293802617118, "clip_ratio/high_mean": 0.0012925277842441574, "clip_ratio/low_mean": 0.0007287934840860544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002021321233769413, "epoch": 3.247302420530767, "grad_norm": 0.27527910470962524, "learning_rate": 1e-06, "loss": -0.0688, "step": 1390 }, { "clip_ratio/high_max": 0.0035688294083229266, "clip_ratio/high_mean": 0.0013115837173245382, "clip_ratio/low_mean": 0.000894385588253499, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002205969278293196, "epoch": 3.249635462233887, "grad_norm": 0.22996072471141815, "learning_rate": 1e-06, "loss": -0.0689, "step": 1391 }, { "clip_ratio/high_max": 0.0032063006656244397, "clip_ratio/high_mean": 0.0012513813744590152, "clip_ratio/low_mean": 0.0010004925225075567, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022518738987855613, "epoch": 3.251968503937008, "grad_norm": 0.2599335014820099, "learning_rate": 1e-06, "loss": -0.069, "step": 1392 }, { "clip_ratio/high_max": 0.0017107321291405242, "clip_ratio/high_mean": 0.0005945113161942572, "clip_ratio/low_mean": 0.0004417756699695019, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010362869943492115, "completions/clipped_ratio": 0.2522321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 1552.5357666015625, "completions/mean_terminated_length": 694.591064453125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 3.2543015456401285, "grad_norm": 0.2847426235675812, "learning_rate": 1e-06, "loss": -0.0712, "num_tokens": 204568730.0, "reward": 0.4676339626312256, "reward_std": 0.12531113624572754, "rewards/verify_math_reward/mean": 0.4676339328289032, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1393 }, { "clip_ratio/high_max": 0.0022250008332775906, "clip_ratio/high_mean": 0.0007966230305100908, "clip_ratio/low_mean": 0.0006261913163143618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014228143299988005, "epoch": 3.2566345873432487, "grad_norm": 0.25943008065223694, "learning_rate": 1e-06, "loss": -0.0715, "step": 1394 }, { "clip_ratio/high_max": 0.0021694751958420966, "clip_ratio/high_mean": 0.0007335646751016611, "clip_ratio/low_mean": 0.0007502906291847466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014838552997389343, "epoch": 3.2589676290463694, "grad_norm": 0.22387363016605377, "learning_rate": 1e-06, "loss": -0.0715, "step": 1395 }, { "clip_ratio/high_max": 0.00225412361578492, "clip_ratio/high_mean": 0.0006936196227798064, "clip_ratio/low_mean": 0.0008708087239028828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015644283539586468, "epoch": 3.2613006707494896, "grad_norm": 0.22405408322811127, "learning_rate": 1e-06, "loss": -0.0716, "step": 1396 }, { "clip_ratio/high_max": 0.002606538255349733, "clip_ratio/high_mean": 0.0008888039737939835, "clip_ratio/low_mean": 0.0007069700986903626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015957740324665792, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3575.0, "completions/mean_length": 1223.63623046875, "completions/mean_terminated_length": 669.0518798828125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.2636337124526102, "grad_norm": 0.3812796473503113, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 205163652.0, "reward": 0.515625, "reward_std": 0.16153399646282196, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1397 }, { "clip_ratio/high_max": 0.0026634673340595327, "clip_ratio/high_mean": 0.001081415710359579, "clip_ratio/low_mean": 0.0009601121455489192, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002041527899564244, "epoch": 3.2659667541557305, "grad_norm": 0.31780505180358887, "learning_rate": 1e-06, "loss": -0.0574, "step": 1398 }, { "clip_ratio/high_max": 0.0027923797897528857, "clip_ratio/high_mean": 0.0010067439689009916, "clip_ratio/low_mean": 0.001261196823179489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002267940857564099, "epoch": 3.268299795858851, "grad_norm": 0.28611859679222107, "learning_rate": 1e-06, "loss": -0.0576, "step": 1399 }, { "clip_ratio/high_max": 0.002342934902117122, "clip_ratio/high_mean": 0.0009197864255838795, "clip_ratio/low_mean": 0.0013291147406562231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002248901131679304, "epoch": 3.2706328375619713, "grad_norm": 0.232257679104805, "learning_rate": 1e-06, "loss": -0.0577, "step": 1400 }, { "clip_ratio/high_max": 0.0023585003764310386, "clip_ratio/high_mean": 0.000794408446381567, "clip_ratio/low_mean": 0.0005302667259456939, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013246751987026073, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 1287.828125, "completions/mean_terminated_length": 649.257568359375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.272965879265092, "grad_norm": 0.3569932281970978, "learning_rate": 1e-06, "loss": -0.0867, "num_tokens": 205724266.0, "reward": 0.4988839626312256, "reward_std": 0.13985875248908997, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1401 }, { "clip_ratio/high_max": 0.003075787186389789, "clip_ratio/high_mean": 0.0009645216541684931, "clip_ratio/low_mean": 0.0008519003768014954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018164220455219038, "epoch": 3.275298920968212, "grad_norm": 0.29467490315437317, "learning_rate": 1e-06, "loss": -0.087, "step": 1402 }, { "clip_ratio/high_max": 0.003196276338712778, "clip_ratio/high_mean": 0.0009206524755427381, "clip_ratio/low_mean": 0.0010368306466261856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019574831567297224, "epoch": 3.277631962671333, "grad_norm": 0.2431877851486206, "learning_rate": 1e-06, "loss": -0.0872, "step": 1403 }, { "clip_ratio/high_max": 0.0028092034262954257, "clip_ratio/high_mean": 0.0009348743496957468, "clip_ratio/low_mean": 0.0012063762533216504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021412506757769734, "epoch": 3.279965004374453, "grad_norm": 0.22555813193321228, "learning_rate": 1e-06, "loss": -0.0873, "step": 1404 }, { "clip_ratio/high_max": 0.002152534943888895, "clip_ratio/high_mean": 0.0008175220791599713, "clip_ratio/low_mean": 0.0006683234432784957, "clip_ratio/low_min": 7.524680768256076e-06, "clip_ratio/region_mean": 0.0014858455178909935, "completions/clipped_ratio": 0.2198660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 1495.5726318359375, "completions/mean_terminated_length": 762.6909790039062, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.2822980460775737, "grad_norm": 0.29808303713798523, "learning_rate": 1e-06, "loss": -0.1224, "num_tokens": 206352035.0, "reward": 0.4966517984867096, "reward_std": 0.18396486341953278, "rewards/verify_math_reward/mean": 0.4966517984867096, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 1405 }, { "clip_ratio/high_max": 0.002606508096505422, "clip_ratio/high_mean": 0.0010587483557173982, "clip_ratio/low_mean": 0.0008699759018782061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019287242466816679, "epoch": 3.284631087780694, "grad_norm": 0.2458135187625885, "learning_rate": 1e-06, "loss": -0.1226, "step": 1406 }, { "clip_ratio/high_max": 0.00274065002304269, "clip_ratio/high_mean": 0.0010915798920905218, "clip_ratio/low_mean": 0.0010510491247259779, "clip_ratio/low_min": 1.3536929145629983e-05, "clip_ratio/region_mean": 0.002142629018635489, "epoch": 3.2869641294838146, "grad_norm": 0.2649960219860077, "learning_rate": 1e-06, "loss": -0.1227, "step": 1407 }, { "clip_ratio/high_max": 0.00254956914432114, "clip_ratio/high_mean": 0.0010358562685723882, "clip_ratio/low_mean": 0.0011377867303963285, "clip_ratio/low_min": 1.3536929145629983e-05, "clip_ratio/region_mean": 0.0021736430426244624, "epoch": 3.289297171186935, "grad_norm": 0.22067445516586304, "learning_rate": 1e-06, "loss": -0.1228, "step": 1408 }, { "clip_ratio/high_max": 0.002485864573827712, "clip_ratio/high_mean": 0.0009715916239656508, "clip_ratio/low_mean": 0.0006173867241159314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015889783389866352, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 1307.6663818359375, "completions/mean_terminated_length": 621.24755859375, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 3.2916302128900554, "grad_norm": 0.4216117858886719, "learning_rate": 1e-06, "loss": -0.0581, "num_tokens": 206892232.0, "reward": 0.5803571939468384, "reward_std": 0.14166000485420227, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 1409 }, { "clip_ratio/high_max": 0.0030081197182880715, "clip_ratio/high_mean": 0.0011857901899929857, "clip_ratio/low_mean": 0.0008584561774114263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020442463573999703, "epoch": 3.2939632545931756, "grad_norm": 0.2833123505115509, "learning_rate": 1e-06, "loss": -0.0584, "step": 1410 }, { "clip_ratio/high_max": 0.0028013972550979815, "clip_ratio/high_mean": 0.001152902543253731, "clip_ratio/low_mean": 0.0011308564498904161, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022837589713162743, "epoch": 3.2962962962962963, "grad_norm": 0.28059399127960205, "learning_rate": 1e-06, "loss": -0.0587, "step": 1411 }, { "clip_ratio/high_max": 0.0031393519675475545, "clip_ratio/high_mean": 0.0010532768737903098, "clip_ratio/low_mean": 0.0012799748437828384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002333251715754159, "epoch": 3.298629337999417, "grad_norm": 0.3258727490901947, "learning_rate": 1e-06, "loss": -0.0587, "step": 1412 }, { "clip_ratio/high_max": 0.0029762344929622486, "clip_ratio/high_mean": 0.0011315756164549384, "clip_ratio/low_mean": 0.000596388447775098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017279640451306477, "completions/clipped_ratio": 0.1986607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3880.0, "completions/mean_length": 1383.8360595703125, "completions/mean_terminated_length": 711.4609985351562, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 3.300962379702537, "grad_norm": 0.3155386447906494, "learning_rate": 1e-06, "loss": -0.0868, "num_tokens": 207493981.0, "reward": 0.4899553656578064, "reward_std": 0.1817513257265091, "rewards/verify_math_reward/mean": 0.4899553656578064, "rewards/verify_math_reward/std": 0.5001782774925232, "step": 1413 }, { "clip_ratio/high_max": 0.003695404317113571, "clip_ratio/high_mean": 0.0013442114504869096, "clip_ratio/low_mean": 0.0008084241362666944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021526355121750385, "epoch": 3.303295421405658, "grad_norm": 0.3200008273124695, "learning_rate": 1e-06, "loss": -0.087, "step": 1414 }, { "clip_ratio/high_max": 0.00346468755742535, "clip_ratio/high_mean": 0.0013513403246179223, "clip_ratio/low_mean": 0.000933639259528718, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022849796223454177, "epoch": 3.305628463108778, "grad_norm": 0.24863432347774506, "learning_rate": 1e-06, "loss": -0.0872, "step": 1415 }, { "clip_ratio/high_max": 0.0036600856838049367, "clip_ratio/high_mean": 0.0013390149251790717, "clip_ratio/low_mean": 0.0011202372443221975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0024592521294835024, "epoch": 3.3079615048118987, "grad_norm": 0.23993004858493805, "learning_rate": 1e-06, "loss": -0.0872, "step": 1416 }, { "clip_ratio/high_max": 0.0019119550051982515, "clip_ratio/high_mean": 0.0006607695031561889, "clip_ratio/low_mean": 0.0004249778171470098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00108574733894784, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 1082.376220703125, "completions/mean_terminated_length": 584.677490234375, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 3.310294546515019, "grad_norm": 0.30247485637664795, "learning_rate": 1e-06, "loss": -0.0402, "num_tokens": 208020966.0, "reward": 0.6361607313156128, "reward_std": 0.12267616391181946, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1417 }, { "clip_ratio/high_max": 0.002196392902988009, "clip_ratio/high_mean": 0.0008674347182022757, "clip_ratio/low_mean": 0.0006404634668797371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00150789816325414, "epoch": 3.3126275882181395, "grad_norm": 0.27442285418510437, "learning_rate": 1e-06, "loss": -0.0405, "step": 1418 }, { "clip_ratio/high_max": 0.0025767409315449186, "clip_ratio/high_mean": 0.0009292753056797665, "clip_ratio/low_mean": 0.0008339601104125904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017632354065426625, "epoch": 3.3149606299212597, "grad_norm": 0.2677547037601471, "learning_rate": 1e-06, "loss": -0.0406, "step": 1419 }, { "clip_ratio/high_max": 0.0021459552117448766, "clip_ratio/high_mean": 0.0007863909440857242, "clip_ratio/low_mean": 0.0009221609288943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017085518957173917, "epoch": 3.3172936716243804, "grad_norm": 0.28091907501220703, "learning_rate": 1e-06, "loss": -0.0406, "step": 1420 }, { "clip_ratio/high_max": 0.0026617609764798544, "clip_ratio/high_mean": 0.001080154237570241, "clip_ratio/low_mean": 0.0006827125680501922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017628667847020552, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 1220.9085693359375, "completions/mean_terminated_length": 683.9708862304688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.3196267133275006, "grad_norm": 0.31796377897262573, "learning_rate": 1e-06, "loss": -0.0813, "num_tokens": 208628124.0, "reward": 0.5859375, "reward_std": 0.1989564299583435, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 1421 }, { "clip_ratio/high_max": 0.003174099554598797, "clip_ratio/high_mean": 0.001284394380490994, "clip_ratio/low_mean": 0.000976650461780082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022610448650084436, "epoch": 3.3219597550306212, "grad_norm": 0.2940112054347992, "learning_rate": 1e-06, "loss": -0.0816, "step": 1422 }, { "clip_ratio/high_max": 0.003210231261618901, "clip_ratio/high_mean": 0.0013108856364851817, "clip_ratio/low_mean": 0.001243566661287332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002554452257754747, "epoch": 3.3242927967337415, "grad_norm": 0.36122965812683105, "learning_rate": 1e-06, "loss": -0.0819, "step": 1423 }, { "clip_ratio/high_max": 0.0031171913651633076, "clip_ratio/high_mean": 0.00118163075967459, "clip_ratio/low_mean": 0.0014060313278605463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002587662158475723, "epoch": 3.326625838436862, "grad_norm": 0.2434595227241516, "learning_rate": 1e-06, "loss": -0.0819, "step": 1424 }, { "clip_ratio/high_max": 0.0022868446685606614, "clip_ratio/high_mean": 0.0009595526171324309, "clip_ratio/low_mean": 0.0006868742293590913, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016464268555864692, "completions/clipped_ratio": 0.2287946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3040.0, "completions/mean_length": 1486.864990234375, "completions/mean_terminated_length": 712.8089599609375, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.3289588801399823, "grad_norm": 0.33154115080833435, "learning_rate": 1e-06, "loss": -0.1223, "num_tokens": 209217779.0, "reward": 0.5345982313156128, "reward_std": 0.17682796716690063, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1425 }, { "clip_ratio/high_max": 0.0027643133798846975, "clip_ratio/high_mean": 0.0012076245911885053, "clip_ratio/low_mean": 0.0008671995437907754, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020748240931425244, "epoch": 3.331291921843103, "grad_norm": 0.2755907475948334, "learning_rate": 1e-06, "loss": -0.1226, "step": 1426 }, { "clip_ratio/high_max": 0.0028445334028219804, "clip_ratio/high_mean": 0.001203192128741648, "clip_ratio/low_mean": 0.000988325117759814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021915172765147872, "epoch": 3.3336249635462236, "grad_norm": 0.2327805757522583, "learning_rate": 1e-06, "loss": -0.1227, "step": 1427 }, { "clip_ratio/high_max": 0.0022473930803244, "clip_ratio/high_mean": 0.0010317661872250028, "clip_ratio/low_mean": 0.0011683286265906645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002200094830186572, "epoch": 3.335958005249344, "grad_norm": 0.3731497824192047, "learning_rate": 1e-06, "loss": -0.1228, "step": 1428 }, { "clip_ratio/high_max": 0.002318192884558812, "clip_ratio/high_mean": 0.0009074763056560187, "clip_ratio/low_mean": 0.0005459625608636998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014534388537867926, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 1155.704345703125, "completions/mean_terminated_length": 670.11572265625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 3.338291046952464, "grad_norm": 0.29542016983032227, "learning_rate": 1e-06, "loss": -0.0759, "num_tokens": 209816866.0, "reward": 0.6116071939468384, "reward_std": 0.1679650843143463, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 1429 }, { "clip_ratio/high_max": 0.0026791319833137095, "clip_ratio/high_mean": 0.0010337810726923635, "clip_ratio/low_mean": 0.0007834240414013038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018172051168221515, "epoch": 3.3406240886555847, "grad_norm": 0.3144708573818207, "learning_rate": 1e-06, "loss": -0.0762, "step": 1430 }, { "clip_ratio/high_max": 0.0030275606841314584, "clip_ratio/high_mean": 0.0010740918824012624, "clip_ratio/low_mean": 0.00085768296776223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019317748447065242, "epoch": 3.3429571303587053, "grad_norm": 0.22165705263614655, "learning_rate": 1e-06, "loss": -0.0764, "step": 1431 }, { "clip_ratio/high_max": 0.0026318026721128263, "clip_ratio/high_mean": 0.0010720016362029128, "clip_ratio/low_mean": 0.0010904667506110854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002162468394089956, "epoch": 3.3452901720618256, "grad_norm": 0.23472611606121063, "learning_rate": 1e-06, "loss": -0.0764, "step": 1432 }, { "clip_ratio/high_max": 0.0021777022557216696, "clip_ratio/high_mean": 0.0008209644402086269, "clip_ratio/low_mean": 0.00048828055128069536, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013092449844407383, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 1342.7098388671875, "completions/mean_terminated_length": 721.2421875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.347623213764946, "grad_norm": 0.3252272307872772, "learning_rate": 1e-06, "loss": -0.0597, "num_tokens": 210438814.0, "reward": 0.559151828289032, "reward_std": 0.1498156636953354, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1433 }, { "clip_ratio/high_max": 0.0021948493376839906, "clip_ratio/high_mean": 0.0007772110275254818, "clip_ratio/low_mean": 0.0007795270157657797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015567380032734945, "epoch": 3.3499562554680664, "grad_norm": 0.2555447816848755, "learning_rate": 1e-06, "loss": -0.0599, "step": 1434 }, { "clip_ratio/high_max": 0.0026419895366416313, "clip_ratio/high_mean": 0.0009608177933841944, "clip_ratio/low_mean": 0.0008887026015145238, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018495203839847818, "epoch": 3.352289297171187, "grad_norm": 0.2451346218585968, "learning_rate": 1e-06, "loss": -0.0601, "step": 1435 }, { "clip_ratio/high_max": 0.0022972883889451623, "clip_ratio/high_mean": 0.000838000169096631, "clip_ratio/low_mean": 0.0010169958750338992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018549960732343607, "epoch": 3.3546223388743073, "grad_norm": 0.19539831578731537, "learning_rate": 1e-06, "loss": -0.0601, "step": 1436 }, { "clip_ratio/high_max": 0.0023833828745409846, "clip_ratio/high_mean": 0.0009572358248988166, "clip_ratio/low_mean": 0.000545118511581677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015023543310235254, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 1180.9810791015625, "completions/mean_terminated_length": 636.5867919921875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 3.356955380577428, "grad_norm": 0.2828822731971741, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 211010349.0, "reward": 0.5691964626312256, "reward_std": 0.15639057755470276, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1437 }, { "clip_ratio/high_max": 0.002974518807604909, "clip_ratio/high_mean": 0.0011843534666695632, "clip_ratio/low_mean": 0.0007223030552268028, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019066565146204084, "epoch": 3.359288422280548, "grad_norm": 0.2754281759262085, "learning_rate": 1e-06, "loss": -0.0353, "step": 1438 }, { "clip_ratio/high_max": 0.0027547199715627357, "clip_ratio/high_mean": 0.0010415829019621015, "clip_ratio/low_mean": 0.0009017273655445024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001943310235219542, "epoch": 3.361621463983669, "grad_norm": 0.24657297134399414, "learning_rate": 1e-06, "loss": -0.0355, "step": 1439 }, { "clip_ratio/high_max": 0.0024943129246821627, "clip_ratio/high_mean": 0.0010055546554212924, "clip_ratio/low_mean": 0.0010134600152014173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020190146860841196, "epoch": 3.363954505686789, "grad_norm": 0.24397654831409454, "learning_rate": 1e-06, "loss": -0.0355, "step": 1440 }, { "clip_ratio/high_max": 0.002338218291697558, "clip_ratio/high_mean": 0.0008907749215723015, "clip_ratio/low_mean": 0.0007159109391068341, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016066858697740827, "completions/clipped_ratio": 0.1852678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2982.0, "completions/mean_length": 1248.5023193359375, "completions/mean_terminated_length": 600.9890747070312, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 3.3662875473899097, "grad_norm": 0.5020262598991394, "learning_rate": 1e-06, "loss": -0.0937, "num_tokens": 211549791.0, "reward": 0.5837053656578064, "reward_std": 0.16735847294330597, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 1441 }, { "clip_ratio/high_max": 0.003099871944868937, "clip_ratio/high_mean": 0.0011718298719642917, "clip_ratio/low_mean": 0.0009420182432222646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002113848146109376, "epoch": 3.36862058909303, "grad_norm": 0.2910059690475464, "learning_rate": 1e-06, "loss": -0.0941, "step": 1442 }, { "clip_ratio/high_max": 0.0036351317903609015, "clip_ratio/high_mean": 0.0012362859579297947, "clip_ratio/low_mean": 0.0012269529543118551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002463238903146703, "epoch": 3.3709536307961505, "grad_norm": 0.25305771827697754, "learning_rate": 1e-06, "loss": -0.0944, "step": 1443 }, { "clip_ratio/high_max": 0.00290341027721297, "clip_ratio/high_mean": 0.0010353881625633221, "clip_ratio/low_mean": 0.001412170080584474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002447558203130029, "epoch": 3.3732866724992707, "grad_norm": 0.2992576062679291, "learning_rate": 1e-06, "loss": -0.0943, "step": 1444 }, { "clip_ratio/high_max": 0.0026444131799507886, "clip_ratio/high_mean": 0.0012394175937515683, "clip_ratio/low_mean": 0.0006469405016105156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018863580698962323, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2743.0, "completions/mean_length": 1167.6273193359375, "completions/mean_terminated_length": 606.875, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 3.3756197142023914, "grad_norm": 0.3741614520549774, "learning_rate": 1e-06, "loss": -0.0885, "num_tokens": 212092313.0, "reward": 0.6037946939468384, "reward_std": 0.19392429292201996, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1445 }, { "clip_ratio/high_max": 0.0033751697483239695, "clip_ratio/high_mean": 0.0014848263417661656, "clip_ratio/low_mean": 0.0009012938253363245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00238612021348672, "epoch": 3.377952755905512, "grad_norm": 0.4091361463069916, "learning_rate": 1e-06, "loss": -0.0888, "step": 1446 }, { "clip_ratio/high_max": 0.002985111394082196, "clip_ratio/high_mean": 0.0014381614200829063, "clip_ratio/low_mean": 0.0011305916850687936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025687531087896787, "epoch": 3.3802857976086322, "grad_norm": 0.27056244015693665, "learning_rate": 1e-06, "loss": -0.089, "step": 1447 }, { "clip_ratio/high_max": 0.0030088677958701737, "clip_ratio/high_mean": 0.001426322076440556, "clip_ratio/low_mean": 0.001348124345895485, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027744463659473695, "epoch": 3.382618839311753, "grad_norm": 0.4235474169254303, "learning_rate": 1e-06, "loss": -0.0891, "step": 1448 }, { "clip_ratio/high_max": 0.002184545337513555, "clip_ratio/high_mean": 0.0008029766613617539, "clip_ratio/low_mean": 0.0007079693550622324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015109460146049969, "completions/clipped_ratio": 0.1741071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 1263.3070068359375, "completions/mean_terminated_length": 666.1445922851562, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 3.384951881014873, "grad_norm": 0.3509569764137268, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 212675356.0, "reward": 0.5022321939468384, "reward_std": 0.14789676666259766, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 1449 }, { "clip_ratio/high_max": 0.0027263353185844608, "clip_ratio/high_mean": 0.0009901039575197501, "clip_ratio/low_mean": 0.0009751847173902206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019652886549010873, "epoch": 3.3872849227179938, "grad_norm": 0.2886752188205719, "learning_rate": 1e-06, "loss": -0.0518, "step": 1450 }, { "clip_ratio/high_max": 0.0028788219424313866, "clip_ratio/high_mean": 0.0010585665240796516, "clip_ratio/low_mean": 0.0011499497886688914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022085162927396595, "epoch": 3.389617964421114, "grad_norm": 0.2655644714832306, "learning_rate": 1e-06, "loss": -0.0521, "step": 1451 }, { "clip_ratio/high_max": 0.0024294789545820095, "clip_ratio/high_mean": 0.0009403186340932734, "clip_ratio/low_mean": 0.0013969198989798315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002337238511245232, "epoch": 3.3919510061242346, "grad_norm": 0.41138431429862976, "learning_rate": 1e-06, "loss": -0.0521, "step": 1452 }, { "clip_ratio/high_max": 0.003638951056927908, "clip_ratio/high_mean": 0.0010821499108715216, "clip_ratio/low_mean": 0.0006731365037921933, "clip_ratio/low_min": 3.533069684635848e-05, "clip_ratio/region_mean": 0.0017552864010212943, "completions/clipped_ratio": 0.2053571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 1338.818115234375, "completions/mean_terminated_length": 626.2879028320312, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 3.394284047827355, "grad_norm": 0.5266408324241638, "learning_rate": 1e-06, "loss": -0.0852, "num_tokens": 213219121.0, "reward": 0.5301339626312256, "reward_std": 0.1286219209432602, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1453 }, { "clip_ratio/high_max": 0.0034111940694856457, "clip_ratio/high_mean": 0.0011253326847509015, "clip_ratio/low_mean": 0.0011274409289399046, "clip_ratio/low_min": 3.533069684635848e-05, "clip_ratio/region_mean": 0.002252773614600301, "epoch": 3.3966170895304755, "grad_norm": 0.3869584798812866, "learning_rate": 1e-06, "loss": -0.0854, "step": 1454 }, { "clip_ratio/high_max": 0.003642870804469567, "clip_ratio/high_mean": 0.001153471666839323, "clip_ratio/low_mean": 0.0013703641252504895, "clip_ratio/low_min": 3.533069684635848e-05, "clip_ratio/region_mean": 0.0025238358066417277, "epoch": 3.3989501312335957, "grad_norm": 0.25054648518562317, "learning_rate": 1e-06, "loss": -0.0856, "step": 1455 }, { "clip_ratio/high_max": 0.003084875839704182, "clip_ratio/high_mean": 0.0010314236951671774, "clip_ratio/low_mean": 0.0015251716185957775, "clip_ratio/low_min": 5.299604163155891e-05, "clip_ratio/region_mean": 0.002556595398345962, "epoch": 3.4012831729367163, "grad_norm": 0.29132816195487976, "learning_rate": 1e-06, "loss": -0.0857, "step": 1456 }, { "clip_ratio/high_max": 0.0021597528975689784, "clip_ratio/high_mean": 0.0007703071896685287, "clip_ratio/low_mean": 0.0005968487203062978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013671559136128053, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3225.0, "completions/mean_length": 1205.3270263671875, "completions/mean_terminated_length": 656.3678588867188, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 3.4036162146398365, "grad_norm": 0.3055138885974884, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 213798326.0, "reward": 0.5703125, "reward_std": 0.14917626976966858, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1457 }, { "clip_ratio/high_max": 0.0023967510569491424, "clip_ratio/high_mean": 0.000890681307282648, "clip_ratio/low_mean": 0.000815241770396824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017059230849554297, "epoch": 3.405949256342957, "grad_norm": 0.31193846464157104, "learning_rate": 1e-06, "loss": -0.056, "step": 1458 }, { "clip_ratio/high_max": 0.0029225418911664747, "clip_ratio/high_mean": 0.0009370192456117366, "clip_ratio/low_mean": 0.0010417152334412094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001978734500880819, "epoch": 3.4082822980460774, "grad_norm": 0.2549133002758026, "learning_rate": 1e-06, "loss": -0.0562, "step": 1459 }, { "clip_ratio/high_max": 0.002612090807815548, "clip_ratio/high_mean": 0.0008295100005852873, "clip_ratio/low_mean": 0.001182372754556127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020118827233090997, "epoch": 3.410615339749198, "grad_norm": 0.22129330039024353, "learning_rate": 1e-06, "loss": -0.0562, "step": 1460 }, { "clip_ratio/high_max": 0.0024314190086442977, "clip_ratio/high_mean": 0.0008742830959818093, "clip_ratio/low_mean": 0.0006333228957373649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015076059935381636, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3286.0, "completions/mean_length": 1304.188720703125, "completions/mean_terminated_length": 683.3629150390625, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 3.4129483814523183, "grad_norm": 0.33513981103897095, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 214401231.0, "reward": 0.5267857313156128, "reward_std": 0.15262722969055176, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1461 }, { "clip_ratio/high_max": 0.0030209238320821896, "clip_ratio/high_mean": 0.0010743962702690624, "clip_ratio/low_mean": 0.000757148836783017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018315451088710688, "epoch": 3.415281423155439, "grad_norm": 0.32869017124176025, "learning_rate": 1e-06, "loss": -0.0539, "step": 1462 }, { "clip_ratio/high_max": 0.0027078243874711916, "clip_ratio/high_mean": 0.0010203564634139184, "clip_ratio/low_mean": 0.0009456528478040127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001966009229363408, "epoch": 3.417614464858559, "grad_norm": 0.31274259090423584, "learning_rate": 1e-06, "loss": -0.054, "step": 1463 }, { "clip_ratio/high_max": 0.0029160712656448595, "clip_ratio/high_mean": 0.0009818917023949325, "clip_ratio/low_mean": 0.001110161920223618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002092053597152699, "epoch": 3.41994750656168, "grad_norm": 0.3171895146369934, "learning_rate": 1e-06, "loss": -0.0542, "step": 1464 }, { "clip_ratio/high_max": 0.0025632300530560315, "clip_ratio/high_mean": 0.0010922487563220784, "clip_ratio/low_mean": 0.0004209585167700425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015132072949199937, "completions/clipped_ratio": 0.2287946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 1456.368408203125, "completions/mean_terminated_length": 673.2648315429688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.4222805482648004, "grad_norm": 0.2822405993938446, "learning_rate": 1e-06, "loss": -0.1051, "num_tokens": 214964665.0, "reward": 0.5189732313156128, "reward_std": 0.16979841887950897, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1465 }, { "clip_ratio/high_max": 0.0032525687711313367, "clip_ratio/high_mean": 0.00120888634774019, "clip_ratio/low_mean": 0.0005372365794755751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001746122958138585, "epoch": 3.4246135899679206, "grad_norm": 0.26969727873802185, "learning_rate": 1e-06, "loss": -0.1052, "step": 1466 }, { "clip_ratio/high_max": 0.0027927601477131248, "clip_ratio/high_mean": 0.0012141498791606864, "clip_ratio/low_mean": 0.0008037213856368908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020178713093628176, "epoch": 3.4269466316710413, "grad_norm": 0.22524547576904297, "learning_rate": 1e-06, "loss": -0.1054, "step": 1467 }, { "clip_ratio/high_max": 0.0033165725326398388, "clip_ratio/high_mean": 0.0013566705092671327, "clip_ratio/low_mean": 0.0008546340759494342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022113045706646517, "epoch": 3.4292796733741615, "grad_norm": 0.2359517365694046, "learning_rate": 1e-06, "loss": -0.1055, "step": 1468 }, { "clip_ratio/high_max": 0.002432909059280064, "clip_ratio/high_mean": 0.0009736033844092162, "clip_ratio/low_mean": 0.0008165398685378022, "clip_ratio/low_min": 3.7224537663860247e-05, "clip_ratio/region_mean": 0.0017901432293001562, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2316.0, "completions/mean_length": 1084.165283203125, "completions/mean_terminated_length": 618.41748046875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.431612715077282, "grad_norm": 0.3691757023334503, "learning_rate": 1e-06, "loss": -0.0603, "num_tokens": 215528637.0, "reward": 0.5970982313156128, "reward_std": 0.1787768304347992, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075523018836975, "step": 1469 }, { "clip_ratio/high_max": 0.003240038473450113, "clip_ratio/high_mean": 0.001244753459104686, "clip_ratio/low_mean": 0.0010759066644823179, "clip_ratio/low_min": 2.810251862683799e-05, "clip_ratio/region_mean": 0.002320660096302163, "epoch": 3.4339457567804024, "grad_norm": 0.2718658745288849, "learning_rate": 1e-06, "loss": -0.0606, "step": 1470 }, { "clip_ratio/high_max": 0.003535356947395485, "clip_ratio/high_mean": 0.0012196841926197521, "clip_ratio/low_mean": 0.0012764189232257195, "clip_ratio/low_min": 6.488450890174136e-05, "clip_ratio/region_mean": 0.0024961030794656835, "epoch": 3.436278798483523, "grad_norm": 0.2634088099002838, "learning_rate": 1e-06, "loss": -0.0608, "step": 1471 }, { "clip_ratio/high_max": 0.003507355591864325, "clip_ratio/high_mean": 0.001267346080567222, "clip_ratio/low_mean": 0.0015165622462518513, "clip_ratio/low_min": 9.048136416822672e-05, "clip_ratio/region_mean": 0.002783908297715243, "epoch": 3.4386118401866432, "grad_norm": 0.24711427092552185, "learning_rate": 1e-06, "loss": -0.061, "step": 1472 }, { "clip_ratio/high_max": 0.0026092431653523818, "clip_ratio/high_mean": 0.0009640817843319383, "clip_ratio/low_mean": 0.0006873779857414775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016514597373316064, "completions/clipped_ratio": 0.1729910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3145.0, "completions/mean_length": 1235.1942138671875, "completions/mean_terminated_length": 636.780029296875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 3.440944881889764, "grad_norm": 0.33380481600761414, "learning_rate": 1e-06, "loss": -0.0653, "num_tokens": 216100843.0, "reward": 0.5345982313156128, "reward_std": 0.1698751002550125, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1473 }, { "clip_ratio/high_max": 0.0029111165495123714, "clip_ratio/high_mean": 0.001066847351467004, "clip_ratio/low_mean": 0.0009605682098481338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020274155322113074, "epoch": 3.443277923592884, "grad_norm": 0.3211491107940674, "learning_rate": 1e-06, "loss": -0.0656, "step": 1474 }, { "clip_ratio/high_max": 0.0030401530166273005, "clip_ratio/high_mean": 0.0011105716566817136, "clip_ratio/low_mean": 0.001179943916213233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002290515556524042, "epoch": 3.4456109652960047, "grad_norm": 0.2712908983230591, "learning_rate": 1e-06, "loss": -0.0657, "step": 1475 }, { "clip_ratio/high_max": 0.0028097581598558463, "clip_ratio/high_mean": 0.0010535451365285553, "clip_ratio/low_mean": 0.0012918122920382302, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002345357359445188, "epoch": 3.447944006999125, "grad_norm": 0.35899391770362854, "learning_rate": 1e-06, "loss": -0.0658, "step": 1476 }, { "clip_ratio/high_max": 0.0027615407088887878, "clip_ratio/high_mean": 0.0009777019477041904, "clip_ratio/low_mean": 0.0004897192902717507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014674212216050364, "completions/clipped_ratio": 0.1953125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 1360.4732666015625, "completions/mean_terminated_length": 696.5104370117188, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 3.4502770487022456, "grad_norm": 0.30021557211875916, "learning_rate": 1e-06, "loss": -0.0882, "num_tokens": 216688891.0, "reward": 0.5401785969734192, "reward_std": 0.13301397860050201, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 1477 }, { "clip_ratio/high_max": 0.003418921187403612, "clip_ratio/high_mean": 0.0011423223222664092, "clip_ratio/low_mean": 0.0006081819453811477, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001750504299707245, "epoch": 3.452610090405366, "grad_norm": 0.2633775472640991, "learning_rate": 1e-06, "loss": -0.0884, "step": 1478 }, { "clip_ratio/high_max": 0.002891820127842948, "clip_ratio/high_mean": 0.0011583926898310892, "clip_ratio/low_mean": 0.0007072868293107604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001865679514594376, "epoch": 3.4549431321084865, "grad_norm": 0.25078412890434265, "learning_rate": 1e-06, "loss": -0.0885, "step": 1479 }, { "clip_ratio/high_max": 0.003491372503049206, "clip_ratio/high_mean": 0.0011569949419936165, "clip_ratio/low_mean": 0.0009172863428830169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020742812994285487, "epoch": 3.457276173811607, "grad_norm": 0.21249748766422272, "learning_rate": 1e-06, "loss": -0.0886, "step": 1480 }, { "clip_ratio/high_max": 0.002392310043433099, "clip_ratio/high_mean": 0.0007180312068157946, "clip_ratio/low_mean": 0.0007742367833998287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014922680238669273, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 1145.55810546875, "completions/mean_terminated_length": 640.3189697265625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.4596092155147273, "grad_norm": 0.35187554359436035, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 217263943.0, "reward": 0.6183035969734192, "reward_std": 0.1450003683567047, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1481 }, { "clip_ratio/high_max": 0.0031581436760461656, "clip_ratio/high_mean": 0.0009571097762091085, "clip_ratio/low_mean": 0.0010297510352756944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019868608069373295, "epoch": 3.4619422572178475, "grad_norm": 0.27443844079971313, "learning_rate": 1e-06, "loss": -0.0428, "step": 1482 }, { "clip_ratio/high_max": 0.002752281063294504, "clip_ratio/high_mean": 0.0008675640815454244, "clip_ratio/low_mean": 0.0012707108498943853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021382749073382, "epoch": 3.464275298920968, "grad_norm": 0.32418301701545715, "learning_rate": 1e-06, "loss": -0.0429, "step": 1483 }, { "clip_ratio/high_max": 0.0028385285258991644, "clip_ratio/high_mean": 0.00083394997909636, "clip_ratio/low_mean": 0.0014543270044669043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002288276948092971, "epoch": 3.466608340624089, "grad_norm": 0.2492295652627945, "learning_rate": 1e-06, "loss": -0.043, "step": 1484 }, { "clip_ratio/high_max": 0.002522495015000459, "clip_ratio/high_mean": 0.0011079816522396868, "clip_ratio/low_mean": 0.0007437406347889919, "clip_ratio/low_min": 3.380270391062368e-05, "clip_ratio/region_mean": 0.0018517222561058588, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2589.0, "completions/mean_length": 1192.966552734375, "completions/mean_terminated_length": 664.4459228515625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.468941382327209, "grad_norm": 0.3722512423992157, "learning_rate": 1e-06, "loss": -0.0637, "num_tokens": 217863585.0, "reward": 0.5680803656578064, "reward_std": 0.19704709947109222, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 1485 }, { "clip_ratio/high_max": 0.0029757852462353185, "clip_ratio/high_mean": 0.0013127740094205365, "clip_ratio/low_mean": 0.001094865772756748, "clip_ratio/low_min": 0.00011356317554600537, "clip_ratio/region_mean": 0.0024076397821772844, "epoch": 3.4712744240303297, "grad_norm": 0.31583505868911743, "learning_rate": 1e-06, "loss": -0.064, "step": 1486 }, { "clip_ratio/high_max": 0.0030220092594390735, "clip_ratio/high_mean": 0.0013473187464114744, "clip_ratio/low_mean": 0.0012209015203552553, "clip_ratio/low_min": 0.0001388404725730652, "clip_ratio/region_mean": 0.00256822032679338, "epoch": 3.47360746573345, "grad_norm": 0.3032079339027405, "learning_rate": 1e-06, "loss": -0.0642, "step": 1487 }, { "clip_ratio/high_max": 0.002851707373338286, "clip_ratio/high_mean": 0.0012149710855737794, "clip_ratio/low_mean": 0.0015116343056433834, "clip_ratio/low_min": 0.00014014556109032128, "clip_ratio/region_mean": 0.0027266053948551416, "epoch": 3.4759405074365706, "grad_norm": 0.2756774127483368, "learning_rate": 1e-06, "loss": -0.0643, "step": 1488 }, { "clip_ratio/high_max": 0.0022743008157704026, "clip_ratio/high_mean": 0.0008011747177079087, "clip_ratio/low_mean": 0.000605744857693935, "clip_ratio/low_min": 1.3760458386968821e-05, "clip_ratio/region_mean": 0.0014069195785850752, "completions/clipped_ratio": 0.2064732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3992.0, "completions/mean_length": 1348.6273193359375, "completions/mean_terminated_length": 633.7693481445312, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 3.478273549139691, "grad_norm": 0.40627923607826233, "learning_rate": 1e-06, "loss": -0.0719, "num_tokens": 218403563.0, "reward": 0.5479910969734192, "reward_std": 0.14673490822315216, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 1489 }, { "clip_ratio/high_max": 0.003021533993887715, "clip_ratio/high_mean": 0.0011017732085747411, "clip_ratio/low_mean": 0.0009259931696306012, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002027766386163421, "epoch": 3.4806065908428114, "grad_norm": 0.3433954119682312, "learning_rate": 1e-06, "loss": -0.0722, "step": 1490 }, { "clip_ratio/high_max": 0.002794168824038934, "clip_ratio/high_mean": 0.0009876603235170478, "clip_ratio/low_mean": 0.0010355198619436123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002023180182732176, "epoch": 3.4829396325459316, "grad_norm": 0.3085142970085144, "learning_rate": 1e-06, "loss": -0.0724, "step": 1491 }, { "clip_ratio/high_max": 0.0029426438704831526, "clip_ratio/high_mean": 0.000971582294369, "clip_ratio/low_mean": 0.0011516471972754516, "clip_ratio/low_min": 2.1566596842603758e-05, "clip_ratio/region_mean": 0.0021232294602668844, "epoch": 3.4852726742490523, "grad_norm": 0.25115662813186646, "learning_rate": 1e-06, "loss": -0.0724, "step": 1492 }, { "clip_ratio/high_max": 0.0029007212397118565, "clip_ratio/high_mean": 0.0010477241830813, "clip_ratio/low_mean": 0.0006721846602886217, "clip_ratio/low_min": 1.7053205738193356e-05, "clip_ratio/region_mean": 0.0017199088288180064, "completions/clipped_ratio": 0.1808035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3476.0, "completions/mean_length": 1296.5703125, "completions/mean_terminated_length": 678.7125244140625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 3.4876057159521725, "grad_norm": 0.3308228552341461, "learning_rate": 1e-06, "loss": -0.083, "num_tokens": 218999322.0, "reward": 0.5234375, "reward_std": 0.17002595961093903, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 1493 }, { "clip_ratio/high_max": 0.0030103138415142894, "clip_ratio/high_mean": 0.0012242803904882749, "clip_ratio/low_mean": 0.0009718649816932157, "clip_ratio/low_min": 1.2447719200281426e-05, "clip_ratio/region_mean": 0.0021961453821859322, "epoch": 3.489938757655293, "grad_norm": 0.3048311471939087, "learning_rate": 1e-06, "loss": -0.0832, "step": 1494 }, { "clip_ratio/high_max": 0.0033319630529149435, "clip_ratio/high_mean": 0.0012709472885035211, "clip_ratio/low_mean": 0.001170280753285624, "clip_ratio/low_min": 2.4895438400562853e-05, "clip_ratio/region_mean": 0.0024412280108663253, "epoch": 3.4922717993584134, "grad_norm": 0.26479119062423706, "learning_rate": 1e-06, "loss": -0.0834, "step": 1495 }, { "clip_ratio/high_max": 0.0031683956112829037, "clip_ratio/high_mean": 0.0011402380714571336, "clip_ratio/low_mean": 0.001393181777530117, "clip_ratio/low_min": 3.734315760084428e-05, "clip_ratio/region_mean": 0.0025334198217024095, "epoch": 3.494604841061534, "grad_norm": 0.25681331753730774, "learning_rate": 1e-06, "loss": -0.0835, "step": 1496 }, { "clip_ratio/high_max": 0.0023501041723648086, "clip_ratio/high_mean": 0.000847990909278451, "clip_ratio/low_mean": 0.00043196730780437065, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00127995821821969, "completions/clipped_ratio": 0.1986607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3779.0, "completions/mean_length": 1313.32373046875, "completions/mean_terminated_length": 623.4679565429688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 3.4969378827646542, "grad_norm": 0.23563139140605927, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 219524948.0, "reward": 0.5345982313156128, "reward_std": 0.12831631302833557, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 1497 }, { "clip_ratio/high_max": 0.0026061613607453182, "clip_ratio/high_mean": 0.0009985405467887176, "clip_ratio/low_mean": 0.0006235589321477164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016220994584728032, "epoch": 3.499270924467775, "grad_norm": 0.23240315914154053, "learning_rate": 1e-06, "loss": -0.0623, "step": 1498 }, { "clip_ratio/high_max": 0.002646317963808542, "clip_ratio/high_mean": 0.0009712394812595448, "clip_ratio/low_mean": 0.0006868788418614713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016581182972004171, "epoch": 3.5016039661708955, "grad_norm": 0.2139987349510193, "learning_rate": 1e-06, "loss": -0.0624, "step": 1499 }, { "clip_ratio/high_max": 0.0025528997921355767, "clip_ratio/high_mean": 0.0009007496255435399, "clip_ratio/low_mean": 0.0009149990187324875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018157487211283296, "epoch": 3.5039370078740157, "grad_norm": 0.22334018349647522, "learning_rate": 1e-06, "loss": -0.0625, "step": 1500 }, { "clip_ratio/high_max": 0.0022797048441134393, "clip_ratio/high_mean": 0.0009540160008327803, "clip_ratio/low_mean": 0.0005550076903091394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015090236738615204, "completions/clipped_ratio": 0.1841517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 1354.9676513671875, "completions/mean_terminated_length": 736.2667846679688, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 3.506270049577136, "grad_norm": 0.3129500448703766, "learning_rate": 1e-06, "loss": -0.0965, "num_tokens": 220151575.0, "reward": 0.5792410969734192, "reward_std": 0.17836888134479523, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1501 }, { "clip_ratio/high_max": 0.0026659188079065643, "clip_ratio/high_mean": 0.0010767554540507263, "clip_ratio/low_mean": 0.0008429652934864862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019197206638636999, "epoch": 3.5086030912802566, "grad_norm": 0.2578331232070923, "learning_rate": 1e-06, "loss": -0.0969, "step": 1502 }, { "clip_ratio/high_max": 0.0027675622841343284, "clip_ratio/high_mean": 0.0011148411813337589, "clip_ratio/low_mean": 0.0009087679827644024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020236091877450235, "epoch": 3.5109361329833773, "grad_norm": 0.24845904111862183, "learning_rate": 1e-06, "loss": -0.097, "step": 1503 }, { "clip_ratio/high_max": 0.002540670393500477, "clip_ratio/high_mean": 0.001044667356836726, "clip_ratio/low_mean": 0.0011574263771763071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002202093724918086, "epoch": 3.5132691746864975, "grad_norm": 0.292165607213974, "learning_rate": 1e-06, "loss": -0.0971, "step": 1504 }, { "clip_ratio/high_max": 0.0026543206186033785, "clip_ratio/high_mean": 0.000993739169643959, "clip_ratio/low_mean": 0.0007413307284878101, "clip_ratio/low_min": 4.1377028537681326e-05, "clip_ratio/region_mean": 0.001735069883579854, "completions/clipped_ratio": 0.1964285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 1401.6429443359375, "completions/mean_terminated_length": 743.022216796875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.515602216389618, "grad_norm": 0.35746651887893677, "learning_rate": 1e-06, "loss": -0.0986, "num_tokens": 220782847.0, "reward": 0.5323660969734192, "reward_std": 0.15777993202209473, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 1505 }, { "clip_ratio/high_max": 0.0028942258359165862, "clip_ratio/high_mean": 0.0011398786791687598, "clip_ratio/low_mean": 0.0008631971413706196, "clip_ratio/low_min": 1.7467858924646862e-05, "clip_ratio/region_mean": 0.0020030757877975702, "epoch": 3.5179352580927383, "grad_norm": 0.3008044958114624, "learning_rate": 1e-06, "loss": -0.0988, "step": 1506 }, { "clip_ratio/high_max": 0.003281142056948738, "clip_ratio/high_mean": 0.0011600244151850347, "clip_ratio/low_mean": 0.0010390899451522273, "clip_ratio/low_min": 1.840942604758311e-05, "clip_ratio/region_mean": 0.002199114329414442, "epoch": 3.520268299795859, "grad_norm": 0.2596571147441864, "learning_rate": 1e-06, "loss": -0.099, "step": 1507 }, { "clip_ratio/high_max": 0.002689667609956814, "clip_ratio/high_mean": 0.0010504342744752648, "clip_ratio/low_mean": 0.0013327549550012918, "clip_ratio/low_min": 3.4935717849293724e-05, "clip_ratio/region_mean": 0.0023831892031012103, "epoch": 3.522601341498979, "grad_norm": 0.21419784426689148, "learning_rate": 1e-06, "loss": -0.0991, "step": 1508 }, { "clip_ratio/high_max": 0.002025470144872088, "clip_ratio/high_mean": 0.000934464811507496, "clip_ratio/low_mean": 0.0007760484650134458, "clip_ratio/low_min": 1.0796337846841197e-05, "clip_ratio/region_mean": 0.0017105132646975107, "completions/clipped_ratio": 0.1640625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 1251.680908203125, "completions/mean_terminated_length": 693.449951171875, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 3.5249343832021, "grad_norm": 0.3148497939109802, "learning_rate": 1e-06, "loss": -0.0672, "num_tokens": 221397097.0, "reward": 0.566964328289032, "reward_std": 0.1840411275625229, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 1509 }, { "clip_ratio/high_max": 0.002676955111382995, "clip_ratio/high_mean": 0.0011807695045717992, "clip_ratio/low_mean": 0.0010588440036372049, "clip_ratio/low_min": 1.955977495526895e-05, "clip_ratio/region_mean": 0.002239613422716502, "epoch": 3.52726742490522, "grad_norm": 0.3090699017047882, "learning_rate": 1e-06, "loss": -0.0675, "step": 1510 }, { "clip_ratio/high_max": 0.0025940429040929303, "clip_ratio/high_mean": 0.001103127473470522, "clip_ratio/low_mean": 0.0012223327503306791, "clip_ratio/low_min": 1.955977495526895e-05, "clip_ratio/region_mean": 0.0023254601765074767, "epoch": 3.5296004666083407, "grad_norm": 0.2467300146818161, "learning_rate": 1e-06, "loss": -0.0676, "step": 1511 }, { "clip_ratio/high_max": 0.002437049952277448, "clip_ratio/high_mean": 0.0010338622214476345, "clip_ratio/low_mean": 0.0014347818942042068, "clip_ratio/low_min": 1.3039849363849498e-05, "clip_ratio/region_mean": 0.0024686441174708307, "epoch": 3.531933508311461, "grad_norm": 0.2955998182296753, "learning_rate": 1e-06, "loss": -0.0677, "step": 1512 }, { "clip_ratio/high_max": 0.0023475160778616555, "clip_ratio/high_mean": 0.000987039549727342, "clip_ratio/low_mean": 0.000704028707332327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016910682679736055, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3899.0, "completions/mean_length": 1262.3326416015625, "completions/mean_terminated_length": 678.8182983398438, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.5342665500145816, "grad_norm": 0.35950562357902527, "learning_rate": 1e-06, "loss": -0.1186, "num_tokens": 221986099.0, "reward": 0.598214328289032, "reward_std": 0.16904954612255096, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1513 }, { "clip_ratio/high_max": 0.0035973012636532076, "clip_ratio/high_mean": 0.001298691571719246, "clip_ratio/low_mean": 0.0008401642062381143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021388557652244344, "epoch": 3.536599591717702, "grad_norm": 0.27253785729408264, "learning_rate": 1e-06, "loss": -0.1188, "step": 1514 }, { "clip_ratio/high_max": 0.0032188785189646296, "clip_ratio/high_mean": 0.0012338145006651757, "clip_ratio/low_mean": 0.0010743178299890133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002308132352482062, "epoch": 3.5389326334208224, "grad_norm": 0.22632579505443573, "learning_rate": 1e-06, "loss": -0.119, "step": 1515 }, { "clip_ratio/high_max": 0.00277203215227928, "clip_ratio/high_mean": 0.0011150104546686634, "clip_ratio/low_mean": 0.0012494994443841279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023645098917768337, "epoch": 3.5412656751239426, "grad_norm": 0.233441561460495, "learning_rate": 1e-06, "loss": -0.119, "step": 1516 }, { "clip_ratio/high_max": 0.0033738151541911066, "clip_ratio/high_mean": 0.0010480080181878293, "clip_ratio/low_mean": 0.000549867203062604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015978752744558733, "completions/clipped_ratio": 0.1964285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3058.0, "completions/mean_length": 1312.8984375, "completions/mean_terminated_length": 632.584716796875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 3.5435987168270633, "grad_norm": 0.3846149146556854, "learning_rate": 1e-06, "loss": -0.0588, "num_tokens": 222541640.0, "reward": 0.53125, "reward_std": 0.13775329291820526, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 1517 }, { "clip_ratio/high_max": 0.003223258572688792, "clip_ratio/high_mean": 0.0011083738590969006, "clip_ratio/low_mean": 0.0009958546870620921, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021042284861323424, "epoch": 3.545931758530184, "grad_norm": 0.3291001319885254, "learning_rate": 1e-06, "loss": -0.059, "step": 1518 }, { "clip_ratio/high_max": 0.0034833777172025293, "clip_ratio/high_mean": 0.0011976627065450884, "clip_ratio/low_mean": 0.001046523768309271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022441865148721263, "epoch": 3.548264800233304, "grad_norm": 0.4383046329021454, "learning_rate": 1e-06, "loss": -0.0591, "step": 1519 }, { "clip_ratio/high_max": 0.0036275723250582814, "clip_ratio/high_mean": 0.0011557021680346224, "clip_ratio/low_mean": 0.0012577502566273324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002413452424661955, "epoch": 3.5505978419364244, "grad_norm": 0.2779461145401001, "learning_rate": 1e-06, "loss": -0.0592, "step": 1520 }, { "clip_ratio/high_max": 0.0026107412777491845, "clip_ratio/high_mean": 0.0011020833007933106, "clip_ratio/low_mean": 0.0007288959131983574, "clip_ratio/low_min": 1.5644554878235795e-05, "clip_ratio/region_mean": 0.00183097917761188, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 1295.583740234375, "completions/mean_terminated_length": 672.844482421875, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.552930883639545, "grad_norm": 0.35715851187705994, "learning_rate": 1e-06, "loss": -0.1101, "num_tokens": 223118067.0, "reward": 0.5714285969734192, "reward_std": 0.20850077271461487, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1521 }, { "clip_ratio/high_max": 0.0031851034073042683, "clip_ratio/high_mean": 0.0013549956274800934, "clip_ratio/low_mean": 0.0009374552846566075, "clip_ratio/low_min": 3.128910975647159e-05, "clip_ratio/region_mean": 0.0022924509103177115, "epoch": 3.5552639253426657, "grad_norm": 0.30691832304000854, "learning_rate": 1e-06, "loss": -0.1104, "step": 1522 }, { "clip_ratio/high_max": 0.003452163393376395, "clip_ratio/high_mean": 0.0013758698623860255, "clip_ratio/low_mean": 0.0010093992796100792, "clip_ratio/low_min": 1.7370761270285584e-05, "clip_ratio/region_mean": 0.002385269144724589, "epoch": 3.557596967045786, "grad_norm": 0.2864389717578888, "learning_rate": 1e-06, "loss": -0.1106, "step": 1523 }, { "clip_ratio/high_max": 0.003090730773692485, "clip_ratio/high_mean": 0.0013269843148009386, "clip_ratio/low_mean": 0.0013503560730896425, "clip_ratio/low_min": 3.098661545664072e-05, "clip_ratio/region_mean": 0.002677340409718454, "epoch": 3.5599300087489065, "grad_norm": 0.25327908992767334, "learning_rate": 1e-06, "loss": -0.1107, "step": 1524 }, { "clip_ratio/high_max": 0.0025775858885026537, "clip_ratio/high_mean": 0.000957845215452835, "clip_ratio/low_mean": 0.0007984565493188711, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017563017754582688, "completions/clipped_ratio": 0.1685267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3018.0, "completions/mean_length": 1227.01904296875, "completions/mean_terminated_length": 645.5208129882812, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 3.5622630504520267, "grad_norm": 0.4949190318584442, "learning_rate": 1e-06, "loss": -0.0815, "num_tokens": 223689812.0, "reward": 0.5959821939468384, "reward_std": 0.1830640286207199, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1525 }, { "clip_ratio/high_max": 0.0029955473728477955, "clip_ratio/high_mean": 0.0011499087486299686, "clip_ratio/low_mean": 0.0010720030668380787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022219118181965314, "epoch": 3.5645960921551474, "grad_norm": 0.28687843680381775, "learning_rate": 1e-06, "loss": -0.0817, "step": 1526 }, { "clip_ratio/high_max": 0.0034787955810315907, "clip_ratio/high_mean": 0.0011736647647921927, "clip_ratio/low_mean": 0.0011168585679115495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002290523327246774, "epoch": 3.5669291338582676, "grad_norm": 0.25637078285217285, "learning_rate": 1e-06, "loss": -0.0819, "step": 1527 }, { "clip_ratio/high_max": 0.00323119483073242, "clip_ratio/high_mean": 0.0011336969037074596, "clip_ratio/low_mean": 0.0014666480810774374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002600344938400667, "epoch": 3.5692621755613883, "grad_norm": 0.32233622670173645, "learning_rate": 1e-06, "loss": -0.0821, "step": 1528 }, { "clip_ratio/high_max": 0.002661401405930519, "clip_ratio/high_mean": 0.0010299417026544688, "clip_ratio/low_mean": 0.0004914766668662196, "clip_ratio/low_min": 1.4328289580589626e-05, "clip_ratio/region_mean": 0.0015214183513307944, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3200.0, "completions/mean_length": 1296.469970703125, "completions/mean_terminated_length": 673.9276733398438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 3.5715952172645085, "grad_norm": 0.3157018721103668, "learning_rate": 1e-06, "loss": -0.0676, "num_tokens": 224279809.0, "reward": 0.5491071939468384, "reward_std": 0.16270402073860168, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 1529 }, { "clip_ratio/high_max": 0.0030244411318562925, "clip_ratio/high_mean": 0.0011270798877376365, "clip_ratio/low_mean": 0.0007608474184053193, "clip_ratio/low_min": 1.735147088766098e-05, "clip_ratio/region_mean": 0.0018879273447964806, "epoch": 3.573928258967629, "grad_norm": 0.28625938296318054, "learning_rate": 1e-06, "loss": -0.0679, "step": 1530 }, { "clip_ratio/high_max": 0.0030160953974700533, "clip_ratio/high_mean": 0.0011641476585282362, "clip_ratio/low_mean": 0.0009189475094899535, "clip_ratio/low_min": 3.470294177532196e-05, "clip_ratio/region_mean": 0.0020830951689276844, "epoch": 3.5762613006707493, "grad_norm": 0.23650334775447845, "learning_rate": 1e-06, "loss": -0.068, "step": 1531 }, { "clip_ratio/high_max": 0.0031486430452787317, "clip_ratio/high_mean": 0.0011302901166345691, "clip_ratio/low_mean": 0.0010223674125882098, "clip_ratio/low_min": 8.596973930252716e-05, "clip_ratio/region_mean": 0.002152657529222779, "epoch": 3.57859434237387, "grad_norm": 0.21814867854118347, "learning_rate": 1e-06, "loss": -0.0681, "step": 1532 }, { "clip_ratio/high_max": 0.0026995788648491725, "clip_ratio/high_mean": 0.0010108518345077755, "clip_ratio/low_mean": 0.0006342021315504098, "clip_ratio/low_min": 2.504006442904938e-05, "clip_ratio/region_mean": 0.0016450539478682913, "completions/clipped_ratio": 0.1830357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 1274.7288818359375, "completions/mean_terminated_length": 642.6406860351562, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.5809273840769906, "grad_norm": 0.42358139157295227, "learning_rate": 1e-06, "loss": -0.0879, "num_tokens": 224843742.0, "reward": 0.5881696939468384, "reward_std": 0.16555650532245636, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 1533 }, { "clip_ratio/high_max": 0.0031136035977397114, "clip_ratio/high_mean": 0.0012501615965447854, "clip_ratio/low_mean": 0.0009599393051757943, "clip_ratio/low_min": 1.252003221452469e-05, "clip_ratio/region_mean": 0.0022101009089965373, "epoch": 3.583260425780111, "grad_norm": 0.31278711557388306, "learning_rate": 1e-06, "loss": -0.0883, "step": 1534 }, { "clip_ratio/high_max": 0.0035869555504177697, "clip_ratio/high_mean": 0.001279306146898307, "clip_ratio/low_mean": 0.0011774325430451427, "clip_ratio/low_min": 5.008012885809876e-05, "clip_ratio/region_mean": 0.0024567386863054708, "epoch": 3.585593467483231, "grad_norm": 0.26393356919288635, "learning_rate": 1e-06, "loss": -0.0886, "step": 1535 }, { "clip_ratio/high_max": 0.00319365114410175, "clip_ratio/high_mean": 0.0011895089846802875, "clip_ratio/low_mean": 0.0013025746593484655, "clip_ratio/low_min": 3.052503234357573e-05, "clip_ratio/region_mean": 0.0024920836149249226, "epoch": 3.5879265091863517, "grad_norm": 0.27731215953826904, "learning_rate": 1e-06, "loss": -0.0885, "step": 1536 }, { "clip_ratio/high_max": 0.003128183823719155, "clip_ratio/high_mean": 0.0011135051026940346, "clip_ratio/low_mean": 0.0005207825106481323, "clip_ratio/low_min": 1.7472742911195382e-05, "clip_ratio/region_mean": 0.0016342875969712622, "completions/clipped_ratio": 0.1819196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3815.0, "completions/mean_length": 1286.3895263671875, "completions/mean_terminated_length": 661.605712890625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 3.5902595508894724, "grad_norm": 0.3832108676433563, "learning_rate": 1e-06, "loss": -0.0734, "num_tokens": 225421971.0, "reward": 0.5301339626312256, "reward_std": 0.17659901082515717, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1537 }, { "clip_ratio/high_max": 0.003468630740826484, "clip_ratio/high_mean": 0.0012430049791873898, "clip_ratio/low_mean": 0.0006966114424358238, "clip_ratio/low_min": 2.518130531825591e-05, "clip_ratio/region_mean": 0.0019396164425415918, "epoch": 3.5925925925925926, "grad_norm": 0.28782737255096436, "learning_rate": 1e-06, "loss": -0.0736, "step": 1538 }, { "clip_ratio/high_max": 0.00373454752843827, "clip_ratio/high_mean": 0.0013299306010594591, "clip_ratio/low_mean": 0.000834752927403315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0021646835084538907, "epoch": 3.5949256342957128, "grad_norm": 0.2644355893135071, "learning_rate": 1e-06, "loss": -0.0738, "step": 1539 }, { "clip_ratio/high_max": 0.0035358406603336334, "clip_ratio/high_mean": 0.0012311937171034515, "clip_ratio/low_mean": 0.0010186804520344594, "clip_ratio/low_min": 4.148689185967669e-05, "clip_ratio/region_mean": 0.002249874160042964, "epoch": 3.5972586759988334, "grad_norm": 0.29489627480506897, "learning_rate": 1e-06, "loss": -0.0739, "step": 1540 }, { "clip_ratio/high_max": 0.003314832487376407, "clip_ratio/high_mean": 0.0010940357988147298, "clip_ratio/low_mean": 0.0006898802121213521, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017839160209405236, "completions/clipped_ratio": 0.2299107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2849.0, "completions/mean_length": 1440.69091796875, "completions/mean_terminated_length": 647.9464111328125, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 3.599591717701954, "grad_norm": 0.39426520466804504, "learning_rate": 1e-06, "loss": -0.1001, "num_tokens": 225952342.0, "reward": 0.5412946939468384, "reward_std": 0.1713072657585144, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 1541 }, { "clip_ratio/high_max": 0.0036451994528761134, "clip_ratio/high_mean": 0.0013332387898117304, "clip_ratio/low_mean": 0.0009492857570876367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002282524590555113, "epoch": 3.6019247594050743, "grad_norm": 0.46961215138435364, "learning_rate": 1e-06, "loss": -0.1004, "step": 1542 }, { "clip_ratio/high_max": 0.003487508642137982, "clip_ratio/high_mean": 0.0013010401034989627, "clip_ratio/low_mean": 0.0011625834558799397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002463623481162358, "epoch": 3.604257801108195, "grad_norm": 0.26113542914390564, "learning_rate": 1e-06, "loss": -0.1006, "step": 1543 }, { "clip_ratio/high_max": 0.003831864560197573, "clip_ratio/high_mean": 0.0012798725147149526, "clip_ratio/low_mean": 0.0014721531297254842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0027520255971467122, "epoch": 3.606590842811315, "grad_norm": 0.2875174283981323, "learning_rate": 1e-06, "loss": -0.1007, "step": 1544 }, { "clip_ratio/high_max": 0.003023206598300021, "clip_ratio/high_mean": 0.001271988690859871, "clip_ratio/low_mean": 0.0006859922905277926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019579810250434093, "completions/clipped_ratio": 0.2198660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2363.0, "completions/mean_length": 1441.9923095703125, "completions/mean_terminated_length": 694.010009765625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.608923884514436, "grad_norm": 0.4443901777267456, "learning_rate": 1e-06, "loss": -0.1147, "num_tokens": 226527839.0, "reward": 0.5691964626312256, "reward_std": 0.20523346960544586, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1545 }, { "clip_ratio/high_max": 0.0034600492435856722, "clip_ratio/high_mean": 0.0014443448417296167, "clip_ratio/low_mean": 0.0009484658476139884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023928107184474356, "epoch": 3.611256926217556, "grad_norm": 0.27703621983528137, "learning_rate": 1e-06, "loss": -0.115, "step": 1546 }, { "clip_ratio/high_max": 0.003560012446541805, "clip_ratio/high_mean": 0.0013946929029771127, "clip_ratio/low_mean": 0.0011226687602174934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0025173616813845, "epoch": 3.6135899679206767, "grad_norm": 0.32288092374801636, "learning_rate": 1e-06, "loss": -0.1152, "step": 1547 }, { "clip_ratio/high_max": 0.003413889651710633, "clip_ratio/high_mean": 0.0014856893867545296, "clip_ratio/low_mean": 0.0013747827833867632, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002860472144675441, "epoch": 3.615923009623797, "grad_norm": 0.30773279070854187, "learning_rate": 1e-06, "loss": -0.1154, "step": 1548 }, { "clip_ratio/high_max": 0.0021399538090918213, "clip_ratio/high_mean": 0.0008620997614343651, "clip_ratio/low_mean": 0.0005568014967138879, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014189012617862318, "completions/clipped_ratio": 0.2477678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2899.0, "completions/mean_length": 1520.0301513671875, "completions/mean_terminated_length": 671.5653076171875, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 3.6182560513269175, "grad_norm": 0.38228416442871094, "learning_rate": 1e-06, "loss": -0.0771, "num_tokens": 227069810.0, "reward": 0.5189732313156128, "reward_std": 0.17058978974819183, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 1549 }, { "clip_ratio/high_max": 0.0030548020695277955, "clip_ratio/high_mean": 0.001068041374310269, "clip_ratio/low_mean": 0.0009025194267451297, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001970560821064282, "epoch": 3.6205890930300377, "grad_norm": 0.32157832384109497, "learning_rate": 1e-06, "loss": -0.0774, "step": 1550 }, { "clip_ratio/high_max": 0.002751321553660091, "clip_ratio/high_mean": 0.0010448119501234032, "clip_ratio/low_mean": 0.001041971154336352, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020867831117357127, "epoch": 3.6229221347331584, "grad_norm": 0.24815063178539276, "learning_rate": 1e-06, "loss": -0.0776, "step": 1551 }, { "clip_ratio/high_max": 0.0026036685085273348, "clip_ratio/high_mean": 0.0009824264798226068, "clip_ratio/low_mean": 0.0012956580212630797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022780845465604216, "epoch": 3.625255176436279, "grad_norm": 0.3354860544204712, "learning_rate": 1e-06, "loss": -0.0776, "step": 1552 }, { "clip_ratio/high_max": 0.0030948232670198195, "clip_ratio/high_mean": 0.001258082458662102, "clip_ratio/low_mean": 0.0007376344910881016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019957169497502036, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2397.0, "completions/mean_length": 1174.458740234375, "completions/mean_terminated_length": 591.71484375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 3.6275882181393992, "grad_norm": 0.37742355465888977, "learning_rate": 1e-06, "loss": -0.0925, "num_tokens": 227601141.0, "reward": 0.606026828289032, "reward_std": 0.15725818276405334, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1553 }, { "clip_ratio/high_max": 0.003571727851522155, "clip_ratio/high_mean": 0.0013547524649766274, "clip_ratio/low_mean": 0.0009574865052854875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002312238997546956, "epoch": 3.6299212598425195, "grad_norm": 0.364230751991272, "learning_rate": 1e-06, "loss": -0.0927, "step": 1554 }, { "clip_ratio/high_max": 0.003034077410120517, "clip_ratio/high_mean": 0.0012617486208910123, "clip_ratio/low_mean": 0.0011363514995537116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0023981001213542186, "epoch": 3.63225430154564, "grad_norm": 0.3032190203666687, "learning_rate": 1e-06, "loss": -0.093, "step": 1555 }, { "clip_ratio/high_max": 0.0028370428626658395, "clip_ratio/high_mean": 0.001281575325265294, "clip_ratio/low_mean": 0.0013283705375215504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002609945928270463, "epoch": 3.6345873432487608, "grad_norm": 0.27822694182395935, "learning_rate": 1e-06, "loss": -0.0931, "step": 1556 }, { "clip_ratio/high_max": 0.002680474444787251, "clip_ratio/high_mean": 0.0010216867940471275, "clip_ratio/low_mean": 0.0005964219117231551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016181086975848302, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3919.0, "completions/mean_length": 1436.2857666015625, "completions/mean_terminated_length": 710.9091186523438, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.636920384951881, "grad_norm": 0.3404932916164398, "learning_rate": 1e-06, "loss": -0.0837, "num_tokens": 228181853.0, "reward": 0.4988839626312256, "reward_std": 0.18201345205307007, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 1557 }, { "clip_ratio/high_max": 0.0031010887505544815, "clip_ratio/high_mean": 0.00122041084432567, "clip_ratio/low_mean": 0.0008648375960547128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002085248452203814, "epoch": 3.6392534266550016, "grad_norm": 0.31997114419937134, "learning_rate": 1e-06, "loss": -0.0839, "step": 1558 }, { "clip_ratio/high_max": 0.003221661929273978, "clip_ratio/high_mean": 0.001245005041710101, "clip_ratio/low_mean": 0.001028126946039265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0022731319768354297, "epoch": 3.641586468358122, "grad_norm": 0.23597189784049988, "learning_rate": 1e-06, "loss": -0.0841, "step": 1559 }, { "clip_ratio/high_max": 0.0029438552883220837, "clip_ratio/high_mean": 0.0011887395639860188, "clip_ratio/low_mean": 0.0012269534418010153, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002415692993963603, "epoch": 3.6439195100612425, "grad_norm": 0.2510083317756653, "learning_rate": 1e-06, "loss": -0.0842, "step": 1560 }, { "clip_ratio/high_max": 0.0028251345502212644, "clip_ratio/high_mean": 0.0012001818868156988, "clip_ratio/low_mean": 0.0006806998262618436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018808816967066377, "completions/clipped_ratio": 0.2142857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 1424.5101318359375, "completions/mean_terminated_length": 695.921875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.6462525517643627, "grad_norm": 0.3832797408103943, "learning_rate": 1e-06, "loss": -0.1433, "num_tokens": 228758326.0, "reward": 0.574776828289032, "reward_std": 0.2134273499250412, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1561 }, { "clip_ratio/high_max": 0.0038279997825156897, "clip_ratio/high_mean": 0.001497450426541036, "clip_ratio/low_mean": 0.0009243143595085712, "clip_ratio/low_min": 1.1888910194102209e-05, "clip_ratio/region_mean": 0.0024217647878685966, "epoch": 3.6485855934674833, "grad_norm": 0.33032625913619995, "learning_rate": 1e-06, "loss": -0.1436, "step": 1562 }, { "clip_ratio/high_max": 0.0033771845774026588, "clip_ratio/high_mean": 0.001481091407185886, "clip_ratio/low_mean": 0.001131790140789235, "clip_ratio/low_min": 7.760118933219928e-06, "clip_ratio/region_mean": 0.0026128815370611846, "epoch": 3.6509186351706036, "grad_norm": 0.41828563809394836, "learning_rate": 1e-06, "loss": -0.1437, "step": 1563 }, { "clip_ratio/high_max": 0.003442059605731629, "clip_ratio/high_mean": 0.0013597105171356816, "clip_ratio/low_mean": 0.0012348913332971279, "clip_ratio/low_min": 2.3280357709154487e-05, "clip_ratio/region_mean": 0.002594601915916428, "epoch": 3.653251676873724, "grad_norm": 0.2683136463165283, "learning_rate": 1e-06, "loss": -0.144, "step": 1564 }, { "clip_ratio/high_max": 0.003076203509408515, "clip_ratio/high_mean": 0.001119939750424237, "clip_ratio/low_mean": 0.0006350224448397057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001754962228005752, "completions/clipped_ratio": 0.2433035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3473.0, "completions/mean_length": 1496.946533203125, "completions/mean_terminated_length": 661.2625122070312, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 3.6555847185768444, "grad_norm": 0.3296455144882202, "learning_rate": 1e-06, "loss": -0.069, "num_tokens": 229305646.0, "reward": 0.4843750298023224, "reward_std": 0.14563976228237152, "rewards/verify_math_reward/mean": 0.484375, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 1565 }, { "clip_ratio/high_max": 0.00340685666014906, "clip_ratio/high_mean": 0.0012808764458895894, "clip_ratio/low_mean": 0.0007630479349245434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020439243817236274, "epoch": 3.657917760279965, "grad_norm": 0.3247814178466797, "learning_rate": 1e-06, "loss": -0.0693, "step": 1566 }, { "clip_ratio/high_max": 0.004076259312569164, "clip_ratio/high_mean": 0.001424640493496554, "clip_ratio/low_mean": 0.0010457216612849152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002470362167514395, "epoch": 3.6602508019830857, "grad_norm": 0.27823442220687866, "learning_rate": 1e-06, "loss": -0.0695, "step": 1567 }, { "clip_ratio/high_max": 0.003177139748004265, "clip_ratio/high_mean": 0.00116790375977871, "clip_ratio/low_mean": 0.0011514383477333467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002319342085684184, "epoch": 3.662583843686206, "grad_norm": 0.2684524953365326, "learning_rate": 1e-06, "loss": -0.0695, "step": 1568 }, { "clip_ratio/high_max": 0.002009153824474197, "clip_ratio/high_mean": 0.0007501048912672559, "clip_ratio/low_mean": 0.00029923092245098815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010493357949599158, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3410.0, "completions/mean_length": 1177.0045166015625, "completions/mean_terminated_length": 663.6903076171875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 3.664916885389326, "grad_norm": 0.24503754079341888, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 229906562.0, "reward": 0.6584821939468384, "reward_std": 0.12223109602928162, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1569 }, { "clip_ratio/high_max": 0.00233134716108907, "clip_ratio/high_mean": 0.00090423029178055, "clip_ratio/low_mean": 0.0003734052588697523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012776355688401964, "epoch": 3.667249927092447, "grad_norm": 0.3895801603794098, "learning_rate": 1e-06, "loss": -0.0425, "step": 1570 }, { "clip_ratio/high_max": 0.002474146938766353, "clip_ratio/high_mean": 0.0009299981356889475, "clip_ratio/low_mean": 0.0004818480730364172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001411846220435109, "epoch": 3.6695829687955674, "grad_norm": 0.21874651312828064, "learning_rate": 1e-06, "loss": -0.0427, "step": 1571 }, { "clip_ratio/high_max": 0.002170715044485405, "clip_ratio/high_mean": 0.0007921155311123584, "clip_ratio/low_mean": 0.0005451086276480055, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013372241410252172, "epoch": 3.6719160104986877, "grad_norm": 0.21428361535072327, "learning_rate": 1e-06, "loss": -0.0427, "step": 1572 }, { "clip_ratio/high_max": 0.0026036849012598395, "clip_ratio/high_mean": 0.0010126106444658944, "clip_ratio/low_mean": 0.0006757626333637745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016883732459973544, "completions/clipped_ratio": 0.1785714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 1322.5179443359375, "completions/mean_terminated_length": 719.5869750976562, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 3.674249052201808, "grad_norm": 0.3311329185962677, "learning_rate": 1e-06, "loss": -0.0896, "num_tokens": 230528706.0, "reward": 0.5580357313156128, "reward_std": 0.1582677811384201, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 1573 }, { "clip_ratio/high_max": 0.003360018708917778, "clip_ratio/high_mean": 0.0012589432990353089, "clip_ratio/low_mean": 0.0008136436954373494, "clip_ratio/low_min": 1.4357913642015774e-05, "clip_ratio/region_mean": 0.0020725869471789338, "epoch": 3.6765820939049285, "grad_norm": 0.2822306156158447, "learning_rate": 1e-06, "loss": -0.0898, "step": 1574 }, { "clip_ratio/high_max": 0.0033367287542205304, "clip_ratio/high_mean": 0.0012043147580698133, "clip_ratio/low_mean": 0.0009804068076846306, "clip_ratio/low_min": 2.8715827284031548e-05, "clip_ratio/region_mean": 0.002184721546655055, "epoch": 3.678915135608049, "grad_norm": 0.24652273952960968, "learning_rate": 1e-06, "loss": -0.09, "step": 1575 }, { "clip_ratio/high_max": 0.0031179989455267787, "clip_ratio/high_mean": 0.0011686939578794409, "clip_ratio/low_mean": 0.001148701980127953, "clip_ratio/low_min": 1.4357913642015774e-05, "clip_ratio/region_mean": 0.002317395934369415, "epoch": 3.6812481773111694, "grad_norm": 0.22814157605171204, "learning_rate": 1e-06, "loss": -0.0901, "step": 1576 }, { "clip_ratio/high_max": 0.0022585531005461235, "clip_ratio/high_mean": 0.0008721222602616763, "clip_ratio/low_mean": 0.0006358251857818686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015079474505910184, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4028.0, "completions/mean_length": 1368.5313720703125, "completions/mean_terminated_length": 697.095947265625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 3.68358121901429, "grad_norm": 0.32418620586395264, "learning_rate": 1e-06, "loss": -0.0899, "num_tokens": 231114142.0, "reward": 0.520089328289032, "reward_std": 0.16070912778377533, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 1577 }, { "clip_ratio/high_max": 0.0029885207768529654, "clip_ratio/high_mean": 0.0011393665590730961, "clip_ratio/low_mean": 0.0008626439630461391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002002010471187532, "epoch": 3.6859142607174102, "grad_norm": 0.2751808166503906, "learning_rate": 1e-06, "loss": -0.09, "step": 1578 }, { "clip_ratio/high_max": 0.003105989428149769, "clip_ratio/high_mean": 0.0011380136656953255, "clip_ratio/low_mean": 0.0009521870260869036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0020902006945107132, "epoch": 3.688247302420531, "grad_norm": 0.24647995829582214, "learning_rate": 1e-06, "loss": -0.0902, "step": 1579 }, { "clip_ratio/high_max": 0.0028495530496002175, "clip_ratio/high_mean": 0.0010594399755063932, "clip_ratio/low_mean": 0.0010839170718099922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002143357072782237, "epoch": 3.690580344123651, "grad_norm": 0.23350109159946442, "learning_rate": 1e-06, "loss": -0.0903, "step": 1580 }, { "clip_ratio/high_max": 0.0023942124462337233, "clip_ratio/high_mean": 0.0007833131221559597, "clip_ratio/low_mean": 0.000566296829674684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013496099709300324, "completions/clipped_ratio": 0.1975446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3932.0, "completions/mean_length": 1337.771240234375, "completions/mean_terminated_length": 658.7635498046875, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 3.6929133858267718, "grad_norm": 0.3115820586681366, "learning_rate": 1e-06, "loss": -0.0788, "num_tokens": 231676737.0, "reward": 0.5758928656578064, "reward_std": 0.1375589668750763, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1581 }, { "clip_ratio/high_max": 0.0033269945488427766, "clip_ratio/high_mean": 0.001019602606902481, "clip_ratio/low_mean": 0.0007508156904805219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00177041827555513, "epoch": 3.695246427529892, "grad_norm": 0.2711862623691559, "learning_rate": 1e-06, "loss": -0.079, "step": 1582 }, { "clip_ratio/high_max": 0.003074318909057183, "clip_ratio/high_mean": 0.0009527142501610797, "clip_ratio/low_mean": 0.0008890186472854111, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001841732933826279, "epoch": 3.6975794692330126, "grad_norm": 0.3059765100479126, "learning_rate": 1e-06, "loss": -0.0791, "step": 1583 }, { "clip_ratio/high_max": 0.0028426043718354777, "clip_ratio/high_mean": 0.0008659506893309299, "clip_ratio/low_mean": 0.0010804437115439214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0019463943535811268, "epoch": 3.699912510936133, "grad_norm": 0.24489635229110718, "learning_rate": 1e-06, "loss": -0.0793, "step": 1584 }, { "clip_ratio/high_max": 0.002322709835425485, "clip_ratio/high_mean": 0.0008757356263231486, "clip_ratio/low_mean": 0.0006089560674809036, "clip_ratio/low_min": 1.4637002095696516e-05, "clip_ratio/region_mean": 0.001484691645600833, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2399.0, "completions/mean_length": 1211.009033203125, "completions/mean_terminated_length": 621.6021728515625, "completions/min_length": 213.0, "completions/min_terminated_length": 213.0, "epoch": 3.7022455526392535, "grad_norm": 0.3546612858772278, "learning_rate": 1e-06, "loss": -0.0814, "num_tokens": 232225569.0, "reward": 0.5703125, "reward_std": 0.17615394294261932, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1585 }, { "clip_ratio/high_max": 0.0026163434667978436, "clip_ratio/high_mean": 0.0010972397249133792, "clip_ratio/low_mean": 0.0008886452833394287, "clip_ratio/low_min": 4.30421805504011e-05, "clip_ratio/region_mean": 0.0019858849700540304, "epoch": 3.704578594342374, "grad_norm": 0.3230176866054535, "learning_rate": 1e-06, "loss": -0.0816, "step": 1586 }, { "clip_ratio/high_max": 0.0028723169307340868, "clip_ratio/high_mean": 0.0011486682378745172, "clip_ratio/low_mean": 0.00103460737682326, "clip_ratio/low_min": 7.179314161476213e-05, "clip_ratio/region_mean": 0.0021832755810464732, "epoch": 3.7069116360454943, "grad_norm": 0.2995162308216095, "learning_rate": 1e-06, "loss": -0.0817, "step": 1587 }, { "clip_ratio/high_max": 0.0025985515967477113, "clip_ratio/high_mean": 0.0009519480572635075, "clip_ratio/low_mean": 0.0012239921852597035, "clip_ratio/low_min": 2.805626172630582e-05, "clip_ratio/region_mean": 0.0021759402225143276, "epoch": 3.7092446777486145, "grad_norm": 0.2810906171798706, "learning_rate": 1e-06, "loss": -0.0818, "step": 1588 }, { "clip_ratio/high_max": 0.002692543566809036, "clip_ratio/high_mean": 0.0009204212274198653, "clip_ratio/low_mean": 0.0006138828317716616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001534304075903492, "completions/clipped_ratio": 0.2075892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3768.0, "completions/mean_length": 1386.204345703125, "completions/mean_terminated_length": 676.3140869140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 3.711577719451735, "grad_norm": 0.32845979928970337, "learning_rate": 1e-06, "loss": -0.0701, "num_tokens": 232796368.0, "reward": 0.5301339626312256, "reward_std": 0.14135508239269257, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 1589 }, { "clip_ratio/high_max": 0.0029532299013226293, "clip_ratio/high_mean": 0.0010062977435154608, "clip_ratio/low_mean": 0.0007842654104024405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017905630884342827, "epoch": 3.713910761154856, "grad_norm": 0.29079484939575195, "learning_rate": 1e-06, "loss": -0.0703, "step": 1590 }, { "clip_ratio/high_max": 0.0029964288987684995, "clip_ratio/high_mean": 0.0010623050002322998, "clip_ratio/low_mean": 0.0009586893656887696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002020994397753384, "epoch": 3.716243802857976, "grad_norm": 0.2608084976673126, "learning_rate": 1e-06, "loss": -0.0704, "step": 1591 }, { "clip_ratio/high_max": 0.0024345178389921784, "clip_ratio/high_mean": 0.0009444899878872093, "clip_ratio/low_mean": 0.0011719619114956004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.002116451876645442, "epoch": 3.7185768445610963, "grad_norm": 0.281854510307312, "learning_rate": 1e-06, "loss": -0.0705, "step": 1592 }, { "clip_ratio/high_max": 0.002234755262179533, "clip_ratio/high_mean": 0.0007076127913023811, "clip_ratio/low_mean": 0.0004429770510796516, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001150589836470317, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3125.0, "completions/mean_length": 1113.5804443359375, "completions/mean_terminated_length": 589.1128540039062, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 3.720909886264217, "grad_norm": 0.2862772047519684, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 233325464.0, "reward": 0.645089328289032, "reward_std": 0.12062598019838333, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1593 }, { "clip_ratio/high_max": 0.0031345177776529454, "clip_ratio/high_mean": 0.0010075391801365186, "clip_ratio/low_mean": 0.0006057004723061254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016132396885950584, "epoch": 3.7232429279673376, "grad_norm": 0.25686419010162354, "learning_rate": 1e-06, "loss": -0.0586, "step": 1594 }, { "clip_ratio/high_max": 0.0028180151057313196, "clip_ratio/high_mean": 0.0009742606889631134, "clip_ratio/low_mean": 0.0008117809757095529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017860416701296344, "epoch": 3.725575969670458, "grad_norm": 0.1940649300813675, "learning_rate": 1e-06, "loss": -0.0587, "step": 1595 }, { "clip_ratio/high_max": 0.002606392386951484, "clip_ratio/high_mean": 0.0008690365175425541, "clip_ratio/low_mean": 0.0008804468684502353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017494833955424838, "epoch": 3.7279090113735784, "grad_norm": 0.26634952425956726, "learning_rate": 1e-06, "loss": -0.0588, "step": 1596 }, { "clip_ratio/high_max": 0.0021255537612887565, "clip_ratio/high_mean": 0.0009493224206380546, "clip_ratio/low_mean": 0.0006909248835427206, "clip_ratio/low_min": 1.2359106221992988e-05, "clip_ratio/region_mean": 0.0016402473047492094, "completions/clipped_ratio": 0.1662946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 1277.224365234375, "completions/mean_terminated_length": 714.9785766601562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.7302420530766986, "grad_norm": 0.399694561958313, "learning_rate": 1e-06, "loss": -0.0923, "num_tokens": 233939537.0, "reward": 0.5602678656578064, "reward_std": 0.19261160492897034, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 1597 }, { "clip_ratio/high_max": 0.0033188991001225077, "clip_ratio/high_mean": 0.001244036258867709, "clip_ratio/low_mean": 0.0009532424633107439, "clip_ratio/low_min": 4.3214809920755215e-05, "clip_ratio/region_mean": 0.002197278750827536, "epoch": 3.7325750947798193, "grad_norm": 0.3414709270000458, "learning_rate": 1e-06, "loss": -0.0926, "step": 1598 }, { "clip_ratio/high_max": 0.0031182927050394937, "clip_ratio/high_mean": 0.0011967702739639208, "clip_ratio/low_mean": 0.0010417346588837972, "clip_ratio/low_min": 5.5573917052242905e-05, "clip_ratio/region_mean": 0.0022385049087461084, "epoch": 3.7349081364829395, "grad_norm": 0.2714030146598816, "learning_rate": 1e-06, "loss": -0.0927, "step": 1599 }, { "clip_ratio/high_max": 0.002907983485783916, "clip_ratio/high_mean": 0.0012127297868573805, "clip_ratio/low_mean": 0.001272044322831789, "clip_ratio/low_min": 3.710917189891916e-05, "clip_ratio/region_mean": 0.002484774093318265, "epoch": 3.73724117818606, "grad_norm": 0.3983018696308136, "learning_rate": 1e-06, "loss": -0.0928, "step": 1600 }, { "epoch": 3.73724117818606, "step": 1600, "total_flos": 0.0, "train_loss": -0.05325791612935007, "train_runtime": 64519.0326, "train_samples_per_second": 22.22, "train_steps_per_second": 0.025 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 233939537, "num_train_epochs": 4, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }