{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.951603498542275, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3733.0, "completions/mean_length": 614.185302734375, "completions/mean_terminated_length": 534.6917724609375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "epoch": 0.009329446064139942, "grad_norm": 0.16490904986858368, "learning_rate": 1e-06, "loss": 0.011, "num_tokens": 554894.0, "reward": 0.5334821939468384, "reward_std": 0.2727942168712616, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 1 }, { "clip_ratio/high_max": 0.0023652190211578272, "clip_ratio/high_mean": 0.0010097824670083355, "clip_ratio/low_mean": 0.0006767770846636267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001686559604422655, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2135.0, "completions/mean_length": 582.6842041015625, "completions/mean_terminated_length": 530.959228515625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.018658892128279883, "grad_norm": 0.1396065652370453, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1110987.0, "reward": 0.4654017984867096, "reward_std": 0.22120505571365356, "rewards/verify_math_reward/mean": 0.4654017984867096, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 2 }, { "clip_ratio/high_max": 0.002458824819768779, "clip_ratio/high_mean": 0.0009520601961412467, "clip_ratio/low_mean": 0.0006422844571716269, "clip_ratio/low_min": 1.4744043255632278e-05, "clip_ratio/region_mean": 0.001594344670593273, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2504.0, "completions/mean_length": 560.5580444335938, "completions/mean_terminated_length": 532.7199096679688, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "epoch": 0.027988338192419825, "grad_norm": 0.13528741896152496, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 1683119.0, "reward": 0.5212053656578064, "reward_std": 0.22019615769386292, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 3 }, { "clip_ratio/high_max": 0.002840214430761989, "clip_ratio/high_mean": 0.0011048212909372523, "clip_ratio/low_mean": 0.0006483338529506, "clip_ratio/low_min": 1.4692054719489533e-05, "clip_ratio/region_mean": 0.0017531551493448205, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3404.0, "completions/mean_length": 575.1796875, "completions/mean_terminated_length": 535.4413452148438, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.037317784256559766, "grad_norm": 0.14472410082817078, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 2236952.0, "reward": 0.5301339626312256, "reward_std": 0.25397443771362305, "rewards/verify_math_reward/mean": 0.5301339030265808, "rewards/verify_math_reward/std": 0.49936985969543457, "step": 4 }, { "clip_ratio/high_max": 0.0023668790236115456, "clip_ratio/high_mean": 0.0011392345841159113, "clip_ratio/low_mean": 0.0007667491390748182, "clip_ratio/low_min": 4.886347505816957e-05, "clip_ratio/region_mean": 0.0019059837286476977, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3572.0, "completions/mean_length": 562.0592041015625, "completions/mean_terminated_length": 534.2328491210938, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "epoch": 0.04664723032069971, "grad_norm": 0.1373066008090973, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 2795453.0, "reward": 0.504464328289032, "reward_std": 0.2638465166091919, "rewards/verify_math_reward/mean": 0.5044642686843872, "rewards/verify_math_reward/std": 0.5002593398094177, "step": 5 }, { "clip_ratio/high_max": 0.0022958667468628846, "clip_ratio/high_mean": 0.0010660433836164884, "clip_ratio/low_mean": 0.0009721827082103118, "clip_ratio/low_min": 0.00018187476507591782, "clip_ratio/region_mean": 0.0020382261063787155, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3899.0, "completions/mean_length": 616.1272583007812, "completions/mean_terminated_length": 572.8745727539062, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.05597667638483965, "grad_norm": 0.1388639509677887, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 3392871.0, "reward": 0.5022321939468384, "reward_std": 0.26367244124412537, "rewards/verify_math_reward/mean": 0.5022321343421936, "rewards/verify_math_reward/std": 0.5002743005752563, "step": 6 }, { "clip_ratio/high_max": 0.002521075904951431, "clip_ratio/high_mean": 0.0010430771653773263, "clip_ratio/low_mean": 0.000778143232309958, "clip_ratio/low_min": 2.4529329493816476e-05, "clip_ratio/region_mean": 0.0018212204086012207, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2758.0, "completions/mean_length": 563.1495971679688, "completions/mean_terminated_length": 515.1923217773438, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 0.0653061224489796, "grad_norm": 0.1458810567855835, "learning_rate": 1e-06, "loss": -0.0141, "num_tokens": 3946757.0, "reward": 0.543526828289032, "reward_std": 0.25032734870910645, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 7 }, { "clip_ratio/high_max": 0.0021492620653589256, "clip_ratio/high_mean": 0.000913873342142324, "clip_ratio/low_mean": 0.0006230521576071624, "clip_ratio/low_min": 8.898491614672821e-05, "clip_ratio/region_mean": 0.0015369254542747512, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3248.0, "completions/mean_length": 625.1741333007812, "completions/mean_terminated_length": 574.07470703125, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.07463556851311953, "grad_norm": 0.12061502039432526, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 4528057.0, "reward": 0.5167410969734192, "reward_std": 0.23930947482585907, "rewards/verify_math_reward/mean": 0.5167410969734192, "rewards/verify_math_reward/std": 0.4999987483024597, "step": 8 }, { "clip_ratio/high_max": 0.002152189288608497, "clip_ratio/high_mean": 0.0009677911584731191, "clip_ratio/low_mean": 0.000675404578942107, "clip_ratio/low_min": 5.260602483758703e-05, "clip_ratio/region_mean": 0.0016431957192253321, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2164.0, "completions/mean_length": 657.6529541015625, "completions/mean_terminated_length": 571.1040649414062, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "epoch": 0.08396501457725948, "grad_norm": 0.12534275650978088, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 5116178.0, "reward": 0.5234375, "reward_std": 0.21925115585327148, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 9 }, { "clip_ratio/high_max": 0.0025328486663056538, "clip_ratio/high_mean": 0.0010626052535371855, "clip_ratio/low_mean": 0.0007675631386518944, "clip_ratio/low_min": 5.274859086057404e-05, "clip_ratio/region_mean": 0.0018301683667232282, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 626.6986694335938, "completions/mean_terminated_length": 575.6217041015625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 0.09329446064139942, "grad_norm": 0.12725567817687988, "learning_rate": 1e-06, "loss": 0.0042, "num_tokens": 5719692.0, "reward": 0.5011160969734192, "reward_std": 0.24949504435062408, "rewards/verify_math_reward/mean": 0.5011160969734192, "rewards/verify_math_reward/std": 0.5002780556678772, "step": 10 }, { "clip_ratio/high_max": 0.0017157621405203827, "clip_ratio/high_mean": 0.000640581754851155, "clip_ratio/low_mean": 0.0005166652979369246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011572470793907996, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 667.1060791015625, "completions/mean_terminated_length": 584.8125610351562, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.10262390670553936, "grad_norm": 0.10232733190059662, "learning_rate": 1e-06, "loss": 0.004, "num_tokens": 6315747.0, "reward": 0.4988839626312256, "reward_std": 0.17627158761024475, "rewards/verify_math_reward/mean": 0.4988839328289032, "rewards/verify_math_reward/std": 0.5002779960632324, "step": 11 }, { "clip_ratio/high_max": 0.0024113980362017173, "clip_ratio/high_mean": 0.0008544203719793586, "clip_ratio/low_mean": 0.0004581312168738805, "clip_ratio/low_min": 1.3769552424491849e-05, "clip_ratio/region_mean": 0.001312551601586165, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 609.5145263671875, "completions/mean_terminated_length": 529.9143676757812, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.1119533527696793, "grad_norm": 0.11482333391904831, "learning_rate": 1e-06, "loss": 0.0026, "num_tokens": 6882432.0, "reward": 0.5345982313156128, "reward_std": 0.18442019820213318, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 12 }, { "clip_ratio/high_max": 0.002209904996561818, "clip_ratio/high_mean": 0.0009161502002825728, "clip_ratio/low_mean": 0.0005432841262518195, "clip_ratio/low_min": 2.8014343115501106e-05, "clip_ratio/region_mean": 0.0014594343301723711, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2898.0, "completions/mean_length": 630.6217041015625, "completions/mean_terminated_length": 579.6024780273438, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.12128279883381925, "grad_norm": 0.11720063537359238, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 7482213.0, "reward": 0.5625, "reward_std": 0.2191762924194336, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 13 }, { "clip_ratio/high_max": 0.002542078284022864, "clip_ratio/high_mean": 0.0010691071147448383, "clip_ratio/low_mean": 0.0006841068716312293, "clip_ratio/low_min": 2.551020406826865e-05, "clip_ratio/region_mean": 0.0017532139463583007, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3292.0, "completions/mean_length": 602.5569458007812, "completions/mean_terminated_length": 526.8722534179688, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.1306122448979592, "grad_norm": 0.13414473831653595, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 8040392.0, "reward": 0.5569196939468384, "reward_std": 0.24348580837249756, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 14 }, { "clip_ratio/high_max": 0.0017708268351270817, "clip_ratio/high_mean": 0.0006339543433568906, "clip_ratio/low_mean": 0.0003903240321960766, "clip_ratio/low_min": 1.7149128325399943e-05, "clip_ratio/region_mean": 0.001024278361001052, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 640.546875, "completions/mean_terminated_length": 573.7178344726562, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 0.13994169096209913, "grad_norm": 0.10310615599155426, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 8638154.0, "reward": 0.5613839626312256, "reward_std": 0.15480685234069824, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 15 }, { "clip_ratio/high_max": 0.0022275920855463482, "clip_ratio/high_mean": 0.0008802472275419859, "clip_ratio/low_mean": 0.0005461570026454865, "clip_ratio/low_min": 3.0013235118531156e-05, "clip_ratio/region_mean": 0.0014264042183640413, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3389.0, "completions/mean_length": 637.6796875, "completions/mean_terminated_length": 574.8011474609375, "completions/min_length": 82.0, "completions/min_terminated_length": 82.0, "epoch": 0.14927113702623906, "grad_norm": 0.12171747535467148, "learning_rate": 1e-06, "loss": 0.0037, "num_tokens": 9235443.0, "reward": 0.559151828289032, "reward_std": 0.19707919657230377, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 16 }, { "clip_ratio/high_max": 0.002297037899552379, "clip_ratio/high_mean": 0.0009401563238498056, "clip_ratio/low_mean": 0.0006466075446951436, "clip_ratio/low_min": 3.81541540264152e-05, "clip_ratio/region_mean": 0.0015867638649069704, "completions/clipped_ratio": 0.0022321428571429047, "completions/max_length": 4096.0, "completions/max_terminated_length": 2212.0, "completions/mean_length": 565.9241333007812, "completions/mean_terminated_length": 558.02685546875, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.158600583090379, "grad_norm": 0.12709392607212067, "learning_rate": 1e-06, "loss": 0.0186, "num_tokens": 9819943.0, "reward": 0.5457589626312256, "reward_std": 0.24058938026428223, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 17 }, { "clip_ratio/high_max": 0.0021680838872271124, "clip_ratio/high_mean": 0.0008981921473605325, "clip_ratio/low_mean": 0.0006542301634908654, "clip_ratio/low_min": 1.641281596675981e-05, "clip_ratio/region_mean": 0.0015524223053944297, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 634.5614013671875, "completions/mean_terminated_length": 575.6265869140625, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "epoch": 0.16793002915451896, "grad_norm": 0.12446357309818268, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 10408302.0, "reward": 0.5558035969734192, "reward_std": 0.21899083256721497, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 18 }, { "clip_ratio/high_max": 0.00250030163442716, "clip_ratio/high_mean": 0.0011050563662138302, "clip_ratio/low_mean": 0.0007577011656394461, "clip_ratio/low_min": 3.5323871998116374e-05, "clip_ratio/region_mean": 0.0018627575118443929, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3468.0, "completions/mean_length": 600.9152221679688, "completions/mean_terminated_length": 549.4586791992188, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.1772594752186589, "grad_norm": 0.13770441710948944, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 10987930.0, "reward": 0.6272321939468384, "reward_std": 0.27343112230300903, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 19 }, { "clip_ratio/high_max": 0.0021293621357472148, "clip_ratio/high_mean": 0.001092827948014019, "clip_ratio/low_mean": 0.0006099227475715452, "clip_ratio/low_min": 4.898774204775691e-05, "clip_ratio/region_mean": 0.0017027506837621331, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3080.0, "completions/mean_length": 557.0658569335938, "completions/mean_terminated_length": 525.18359375, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 0.18658892128279883, "grad_norm": 0.13200710713863373, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 11545461.0, "reward": 0.645089328289032, "reward_std": 0.22631458938121796, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 20 }, { "clip_ratio/high_max": 0.0019469308681436814, "clip_ratio/high_mean": 0.0007061947162583238, "clip_ratio/low_mean": 0.0005188142467886792, "clip_ratio/low_min": 3.999899490736425e-05, "clip_ratio/region_mean": 0.0012250089639564976, "completions/clipped_ratio": 0.008928571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3670.0, "completions/mean_length": 568.4888916015625, "completions/mean_terminated_length": 536.70947265625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.19591836734693877, "grad_norm": 0.10863891988992691, "learning_rate": 1e-06, "loss": 0.0056, "num_tokens": 12104251.0, "reward": 0.59375, "reward_std": 0.17186929285526276, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 21 }, { "clip_ratio/high_max": 0.0017850339609140065, "clip_ratio/high_mean": 0.0007677656249143183, "clip_ratio/low_mean": 0.0005274809600450681, "clip_ratio/low_min": 1.4796401956118643e-05, "clip_ratio/region_mean": 0.0012952466131537221, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 654.8203125, "completions/mean_terminated_length": 596.23046875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.20524781341107873, "grad_norm": 0.10651501268148422, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 12724962.0, "reward": 0.5212053656578064, "reward_std": 0.1947491466999054, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 22 }, { "clip_ratio/high_max": 0.0021366372529882938, "clip_ratio/high_mean": 0.0008756777497183066, "clip_ratio/low_mean": 0.0005200504274398554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013957281553302892, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 655.4241333007812, "completions/mean_terminated_length": 564.7789306640625, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 0.21457725947521866, "grad_norm": 7.990764617919922, "learning_rate": 1e-06, "loss": 0.0095, "num_tokens": 13302350.0, "reward": 0.6350446939468384, "reward_std": 0.1891171634197235, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 23 }, { "clip_ratio/high_max": 0.0021235865060589276, "clip_ratio/high_mean": 0.000820696081063943, "clip_ratio/low_mean": 0.0004587074627124821, "clip_ratio/low_min": 1.581077594892122e-05, "clip_ratio/region_mean": 0.0012794035465049092, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3345.0, "completions/mean_length": 644.8783569335938, "completions/mean_terminated_length": 590.0986328125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.2239067055393586, "grad_norm": 0.11133348941802979, "learning_rate": 1e-06, "loss": 0.0114, "num_tokens": 13907113.0, "reward": 0.5223214626312256, "reward_std": 0.19643910229206085, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 24 }, { "clip_ratio/high_max": 0.0021659615376847796, "clip_ratio/high_mean": 0.0007778075250826078, "clip_ratio/low_mean": 0.0004985564892194816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012763640042976476, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3484.0, "completions/mean_length": 669.247802734375, "completions/mean_terminated_length": 610.903564453125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 0.23323615160349853, "grad_norm": 0.1110498383641243, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 14530551.0, "reward": 0.5401785969734192, "reward_std": 0.18032734096050262, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 25 }, { "clip_ratio/high_max": 0.0020686442585429177, "clip_ratio/high_mean": 0.0008784457386354916, "clip_ratio/low_mean": 0.0005849237613801961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014633694845542777, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2760.0, "completions/mean_length": 618.2890625, "completions/mean_terminated_length": 559.0772094726562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.2425655976676385, "grad_norm": 0.12153169512748718, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 15110570.0, "reward": 0.512276828289032, "reward_std": 0.21564117074012756, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 26 }, { "clip_ratio/high_max": 0.002358836092753336, "clip_ratio/high_mean": 0.0010083036395371892, "clip_ratio/low_mean": 0.0005340912011888577, "clip_ratio/low_min": 4.825765336136101e-05, "clip_ratio/region_mean": 0.0015423948170791846, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 2541.0, "completions/mean_length": 587.0201416015625, "completions/mean_terminated_length": 547.4153442382812, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "epoch": 0.2518950437317784, "grad_norm": 0.12810222804546356, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 15683700.0, "reward": 0.5758928656578064, "reward_std": 0.2073742300271988, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 27 }, { "clip_ratio/high_max": 0.0020375716740090866, "clip_ratio/high_mean": 0.0008536390269000549, "clip_ratio/low_mean": 0.0005751541275458294, "clip_ratio/low_min": 6.23652895228588e-05, "clip_ratio/region_mean": 0.001428793155355379, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 590.7042846679688, "completions/mean_terminated_length": 543.12109375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.2612244897959184, "grad_norm": 0.1335379183292389, "learning_rate": 1e-06, "loss": 0.0076, "num_tokens": 16253811.0, "reward": 0.5546875, "reward_std": 0.21373297274112701, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 28 }, { "clip_ratio/high_max": 0.002163997258321615, "clip_ratio/high_mean": 0.0008853104918671306, "clip_ratio/low_mean": 0.0006560976398759522, "clip_ratio/low_min": 4.884715690423036e-05, "clip_ratio/region_mean": 0.001541408164484892, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 660.6160888671875, "completions/mean_terminated_length": 590.1868286132812, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.2705539358600583, "grad_norm": 0.12447265535593033, "learning_rate": 1e-06, "loss": -0.0152, "num_tokens": 16860979.0, "reward": 0.5055803656578064, "reward_std": 0.21511800587177277, "rewards/verify_math_reward/mean": 0.5055803656578064, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 29 }, { "clip_ratio/high_max": 0.0018319295995752327, "clip_ratio/high_mean": 0.000857271057611797, "clip_ratio/low_mean": 0.0006075464270907105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00146481746196514, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2588.0, "completions/mean_length": 587.84375, "completions/mean_terminated_length": 540.2217407226562, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.27988338192419826, "grad_norm": 0.12270282208919525, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 17433863.0, "reward": 0.5703125, "reward_std": 0.21109367907047272, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 30 }, { "clip_ratio/high_max": 0.0020136640159762464, "clip_ratio/high_mean": 0.0009078333459910937, "clip_ratio/low_mean": 0.0005748930125264451, "clip_ratio/low_min": 4.280285611457657e-05, "clip_ratio/region_mean": 0.0014827263585175388, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 655.3795166015625, "completions/mean_terminated_length": 616.5462646484375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 0.2892128279883382, "grad_norm": 0.11932247877120972, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 18065003.0, "reward": 0.5457589626312256, "reward_std": 0.23420287668704987, "rewards/verify_math_reward/mean": 0.5457589030265808, "rewards/verify_math_reward/std": 0.4981797933578491, "step": 31 }, { "clip_ratio/high_max": 0.002112112160830293, "clip_ratio/high_mean": 0.000947833490499761, "clip_ratio/low_mean": 0.0008170058918040013, "clip_ratio/low_min": 4.710533085017232e-05, "clip_ratio/region_mean": 0.0017648393550189212, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3921.0, "completions/mean_length": 599.8381958007812, "completions/mean_terminated_length": 552.3789672851562, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.29854227405247813, "grad_norm": 0.13691404461860657, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 18644146.0, "reward": 0.5625, "reward_std": 0.24716567993164062, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 32 }, { "clip_ratio/high_max": 0.002308595256181434, "clip_ratio/high_mean": 0.0008604364102211548, "clip_ratio/low_mean": 0.0007807903384673409, "clip_ratio/low_min": 7.305674625968095e-05, "clip_ratio/region_mean": 0.0016412267432315275, "completions/clipped_ratio": 0.005580357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 547.2835083007812, "completions/mean_terminated_length": 527.3692626953125, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.30787172011661806, "grad_norm": 0.1319669634103775, "learning_rate": 1e-06, "loss": 0.0229, "num_tokens": 19200568.0, "reward": 0.5334821939468384, "reward_std": 0.23101183772087097, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 33 }, { "clip_ratio/high_max": 0.0020851020599366166, "clip_ratio/high_mean": 0.0008397549772780621, "clip_ratio/low_mean": 0.0007184331880125683, "clip_ratio/low_min": 1.4137072867015377e-05, "clip_ratio/region_mean": 0.0015581881743855774, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 639.59375, "completions/mean_terminated_length": 572.7462768554688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 0.317201166180758, "grad_norm": 0.1240430399775505, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 19787404.0, "reward": 0.546875, "reward_std": 0.21538084745407104, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 34 }, { "clip_ratio/high_max": 0.0017645746302150656, "clip_ratio/high_mean": 0.0007314048471016577, "clip_ratio/low_mean": 0.0006384962380252546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013699011033168063, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3674.0, "completions/mean_length": 615.6574096679688, "completions/mean_terminated_length": 572.3988647460938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.32653061224489793, "grad_norm": 0.12025720626115799, "learning_rate": 1e-06, "loss": 0.0258, "num_tokens": 20377633.0, "reward": 0.59375, "reward_std": 0.1985117793083191, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 35 }, { "clip_ratio/high_max": 0.0021113943730597384, "clip_ratio/high_mean": 0.0008957727495726431, "clip_ratio/low_mean": 0.0006973784638830693, "clip_ratio/low_min": 2.8803593522752635e-05, "clip_ratio/region_mean": 0.0015931512170936912, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3386.0, "completions/mean_length": 646.8560791015625, "completions/mean_terminated_length": 592.1077270507812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.3358600583090379, "grad_norm": 0.11791989207267761, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 20992696.0, "reward": 0.5345982313156128, "reward_std": 0.22074860334396362, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 36 }, { "clip_ratio/high_max": 0.0023939193997648545, "clip_ratio/high_mean": 0.0010083259876410011, "clip_ratio/low_mean": 0.0005839945970365079, "clip_ratio/low_min": 1.840942604758311e-05, "clip_ratio/region_mean": 0.0015923206083243713, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 611.78125, "completions/mean_terminated_length": 548.4318237304688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.34518950437317786, "grad_norm": 0.12318096309900284, "learning_rate": 1e-06, "loss": 0.01, "num_tokens": 21552044.0, "reward": 0.5535714626312256, "reward_std": 0.20275256037712097, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 37 }, { "clip_ratio/high_max": 0.0021015226702729706, "clip_ratio/high_mean": 0.0008463711510557914, "clip_ratio/low_mean": 0.0005464616351673612, "clip_ratio/low_min": 4.9030876652977895e-05, "clip_ratio/region_mean": 0.001392832778947195, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 620.0279541015625, "completions/mean_terminated_length": 560.8456420898438, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 0.3545189504373178, "grad_norm": 0.11765076965093613, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 22135485.0, "reward": 0.543526828289032, "reward_std": 0.20069055259227753, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 38 }, { "clip_ratio/high_max": 0.0019079652411164716, "clip_ratio/high_mean": 0.000800685964350123, "clip_ratio/low_mean": 0.0005495621308000409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013502480833267327, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2939.0, "completions/mean_length": 594.443115234375, "completions/mean_terminated_length": 550.9208984375, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "epoch": 0.3638483965014577, "grad_norm": 0.12217120081186295, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 22718914.0, "reward": 0.53125, "reward_std": 0.21079127490520477, "rewards/verify_math_reward/mean": 0.53125, "rewards/verify_math_reward/std": 0.4993011951446533, "step": 39 }, { "clip_ratio/high_max": 0.0018332536146772327, "clip_ratio/high_mean": 0.0008255630032181216, "clip_ratio/low_mean": 0.0006326256452666712, "clip_ratio/low_min": 5.133819468028378e-05, "clip_ratio/region_mean": 0.0014581886134692468, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2932.0, "completions/mean_length": 604.0971069335938, "completions/mean_terminated_length": 556.6957397460938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.37317784256559766, "grad_norm": 0.11948627978563309, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 23313385.0, "reward": 0.5267857313156128, "reward_std": 0.2077540010213852, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 40 }, { "clip_ratio/high_max": 0.0022066068850108422, "clip_ratio/high_mean": 0.0007734388218523236, "clip_ratio/low_mean": 0.0008182028213923331, "clip_ratio/low_min": 9.753470476425719e-05, "clip_ratio/region_mean": 0.0015916416450636461, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3936.0, "completions/mean_length": 679.4342041015625, "completions/mean_terminated_length": 609.3906860351562, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.3825072886297376, "grad_norm": 0.11911804974079132, "learning_rate": 1e-06, "loss": 0.0065, "num_tokens": 23950214.0, "reward": 0.5, "reward_std": 0.21640115976333618, "rewards/verify_math_reward/mean": 0.5, "rewards/verify_math_reward/std": 0.5002792477607727, "step": 41 }, { "clip_ratio/high_max": 0.001551356423078687, "clip_ratio/high_mean": 0.0005771869509771932, "clip_ratio/low_mean": 0.00043534704309422523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010125339940714184, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3311.0, "completions/mean_length": 589.7332763671875, "completions/mean_terminated_length": 554.1566772460938, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 0.39183673469387753, "grad_norm": 0.1045951321721077, "learning_rate": 1e-06, "loss": -0.0094, "num_tokens": 24532951.0, "reward": 0.5290178656578064, "reward_std": 0.155109241604805, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 42 }, { "clip_ratio/high_max": 0.0019523338414728642, "clip_ratio/high_mean": 0.0007970899187057512, "clip_ratio/low_mean": 0.0004689718434747192, "clip_ratio/low_min": 2.1147014194866642e-05, "clip_ratio/region_mean": 0.0012660617976507638, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3754.0, "completions/mean_length": 601.4252319335938, "completions/mean_terminated_length": 557.9898681640625, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 0.40116618075801747, "grad_norm": 0.12115523964166641, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 25112908.0, "reward": 0.5758928656578064, "reward_std": 0.19962720572948456, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448275566101074, "step": 43 }, { "clip_ratio/high_max": 0.00213483496918343, "clip_ratio/high_mean": 0.0008525035227648914, "clip_ratio/low_mean": 0.0005053669019616791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013578704274550546, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1779.0, "completions/mean_length": 538.3817138671875, "completions/mean_terminated_length": 498.2279968261719, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.41049562682215746, "grad_norm": 0.1257464438676834, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 25640530.0, "reward": 0.6116071939468384, "reward_std": 0.1928277462720871, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 44 }, { "clip_ratio/high_max": 0.0021022484070272185, "clip_ratio/high_mean": 0.0008700824600964552, "clip_ratio/low_mean": 0.0005214876373429433, "clip_ratio/low_min": 2.5895944418152794e-05, "clip_ratio/region_mean": 0.0013915700983488932, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2456.0, "completions/mean_length": 639.310302734375, "completions/mean_terminated_length": 576.4613647460938, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.4198250728862974, "grad_norm": 0.11802743375301361, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 26241208.0, "reward": 0.578125, "reward_std": 0.20256711542606354, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 45 }, { "clip_ratio/high_max": 0.0019054479635087773, "clip_ratio/high_mean": 0.0008108138445095392, "clip_ratio/low_mean": 0.0006439466033043573, "clip_ratio/low_min": 3.09958895741147e-05, "clip_ratio/region_mean": 0.0014547604660037905, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2816.0, "completions/mean_length": 676.0971069335938, "completions/mean_terminated_length": 613.9170532226562, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.4291545189504373, "grad_norm": 0.11587009578943253, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 26869903.0, "reward": 0.5636160969734192, "reward_std": 0.22180238366127014, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 46 }, { "clip_ratio/high_max": 0.001860423286416335, "clip_ratio/high_mean": 0.000731741974959732, "clip_ratio/low_mean": 0.0005995130404699012, "clip_ratio/low_min": 3.8435670830949675e-05, "clip_ratio/region_mean": 0.001331254985416308, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2903.0, "completions/mean_length": 643.1373291015625, "completions/mean_terminated_length": 592.3023681640625, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 0.43848396501457726, "grad_norm": 0.11640927940607071, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 27477802.0, "reward": 0.5569196939468384, "reward_std": 0.1954626888036728, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 47 }, { "clip_ratio/high_max": 0.0017458675538364332, "clip_ratio/high_mean": 0.0006616584932999103, "clip_ratio/low_mean": 0.0005689935733244056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012306520802667364, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 660.2277221679688, "completions/mean_terminated_length": 557.5494384765625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.4478134110787172, "grad_norm": 0.12104179710149765, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 28057518.0, "reward": 0.535714328289032, "reward_std": 0.17634011805057526, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 48 }, { "clip_ratio/high_max": 0.002319449369679205, "clip_ratio/high_mean": 0.0009525951099931262, "clip_ratio/low_mean": 0.000717777469617431, "clip_ratio/low_min": 1.9379844161449e-05, "clip_ratio/region_mean": 0.0016703725850675255, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1987.0, "completions/mean_length": 606.1082763671875, "completions/mean_terminated_length": 558.7341918945312, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 0.45714285714285713, "grad_norm": 0.12610645592212677, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 28634455.0, "reward": 0.515625, "reward_std": 0.2249245047569275, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 49 }, { "clip_ratio/high_max": 0.0022506101013277657, "clip_ratio/high_mean": 0.0008784776182437781, "clip_ratio/low_mean": 0.0006992462167545455, "clip_ratio/low_min": 2.485473578417441e-05, "clip_ratio/region_mean": 0.0015777238113514613, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3814.0, "completions/mean_length": 663.341552734375, "completions/mean_terminated_length": 548.5236206054688, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "epoch": 0.46647230320699706, "grad_norm": 0.13359029591083527, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 29212561.0, "reward": 0.5234375, "reward_std": 0.22292782366275787, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 50 }, { "clip_ratio/high_max": 0.002292481505719479, "clip_ratio/high_mean": 0.0009920787779265083, "clip_ratio/low_mean": 0.0006807221616327297, "clip_ratio/low_min": 2.8170768928248435e-05, "clip_ratio/region_mean": 0.0016728009868529625, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3475.0, "completions/mean_length": 657.482177734375, "completions/mean_terminated_length": 562.843994140625, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 0.47580174927113705, "grad_norm": 0.13377641141414642, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 29784569.0, "reward": 0.590401828289032, "reward_std": 0.2197408229112625, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 51 }, { "clip_ratio/high_max": 0.002113297443429474, "clip_ratio/high_mean": 0.0008390244620386511, "clip_ratio/low_mean": 0.0005232069088378921, "clip_ratio/low_min": 4.2957304685842246e-05, "clip_ratio/region_mean": 0.001362231374514522, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 604.1261596679688, "completions/mean_terminated_length": 536.5927124023438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.485131195335277, "grad_norm": 0.12396257370710373, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 30335922.0, "reward": 0.6149553656578064, "reward_std": 0.1970124989748001, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 52 }, { "clip_ratio/high_max": 0.0019683467708091484, "clip_ratio/high_mean": 0.0008121754190142383, "clip_ratio/low_mean": 0.0005306686509811698, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001342844061582582, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 594.0513916015625, "completions/mean_terminated_length": 514.09814453125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.4944606413994169, "grad_norm": 0.11551974713802338, "learning_rate": 1e-06, "loss": 0.0059, "num_tokens": 30872024.0, "reward": 0.5736607313156128, "reward_std": 0.18309611082077026, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 53 }, { "clip_ratio/high_max": 0.0018420130618324038, "clip_ratio/high_mean": 0.0008389095346501563, "clip_ratio/low_mean": 0.0005182864852031344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013571960516856052, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2842.0, "completions/mean_length": 557.5011596679688, "completions/mean_terminated_length": 489.06597900390625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.5037900874635568, "grad_norm": 0.1274426281452179, "learning_rate": 1e-06, "loss": 0.0165, "num_tokens": 31391753.0, "reward": 0.5926339626312256, "reward_std": 0.19050613045692444, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 54 }, { "clip_ratio/high_max": 0.0024072310770861804, "clip_ratio/high_mean": 0.0009133650200965349, "clip_ratio/low_mean": 0.0006398809582606191, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001553245987452101, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 1989.0, "completions/mean_length": 547.6495971679688, "completions/mean_terminated_length": 507.6004638671875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.5131195335276968, "grad_norm": 0.13147826492786407, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 31935879.0, "reward": 0.598214328289032, "reward_std": 0.202678382396698, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 55 }, { "clip_ratio/high_max": 0.002112068636051845, "clip_ratio/high_mean": 0.0007095787332218606, "clip_ratio/low_mean": 0.0006021747067279648, "clip_ratio/low_min": 3.432239009271143e-05, "clip_ratio/region_mean": 0.0013117534545017406, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2280.0, "completions/mean_length": 602.078125, "completions/mean_terminated_length": 546.6190795898438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.5224489795918368, "grad_norm": 0.12339483201503754, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 32509405.0, "reward": 0.527901828289032, "reward_std": 0.19297927618026733, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 56 }, { "clip_ratio/high_max": 0.002014315490669105, "clip_ratio/high_mean": 0.000809934706921922, "clip_ratio/low_mean": 0.0005260051398181531, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013359398253669497, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2168.0, "completions/mean_length": 570.6495971679688, "completions/mean_terminated_length": 506.55224609375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.5317784256559767, "grad_norm": 0.12417443841695786, "learning_rate": 1e-06, "loss": 0.0103, "num_tokens": 33041923.0, "reward": 0.5892857313156128, "reward_std": 0.18193607032299042, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 57 }, { "clip_ratio/high_max": 0.0021554861014010385, "clip_ratio/high_mean": 0.0008511697506037308, "clip_ratio/low_mean": 0.0005805591317766812, "clip_ratio/low_min": 3.1049397875904106e-05, "clip_ratio/region_mean": 0.0014317288696474861, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 604.8136596679688, "completions/mean_terminated_length": 537.2935180664062, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 0.5411078717201167, "grad_norm": 0.12504227459430695, "learning_rate": 1e-06, "loss": 0.016, "num_tokens": 33604068.0, "reward": 0.5491071939468384, "reward_std": 0.222071573138237, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 58 }, { "clip_ratio/high_max": 0.0017629893991397694, "clip_ratio/high_mean": 0.0006641444506385596, "clip_ratio/low_mean": 0.0005931061241426505, "clip_ratio/low_min": 4.5553937525255606e-05, "clip_ratio/region_mean": 0.0012572505911521148, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4023.0, "completions/mean_length": 594.6607666015625, "completions/mean_terminated_length": 531.0, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.5504373177842565, "grad_norm": 0.16546018421649933, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 34165492.0, "reward": 0.5814732313156128, "reward_std": 0.15947312116622925, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 59 }, { "clip_ratio/high_max": 0.0016925646559684537, "clip_ratio/high_mean": 0.000671632211378892, "clip_ratio/low_mean": 0.000527624249116343, "clip_ratio/low_min": 1.2502500794653315e-05, "clip_ratio/region_mean": 0.0011992564395768568, "completions/clipped_ratio": 0.010044642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2217.0, "completions/mean_length": 555.5413208007812, "completions/mean_terminated_length": 519.6177978515625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.5597667638483965, "grad_norm": 0.821980893611908, "learning_rate": 1e-06, "loss": -0.0153, "num_tokens": 34711401.0, "reward": 0.5770089626312256, "reward_std": 0.15878772735595703, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 60 }, { "clip_ratio/high_max": 0.002033336590102408, "clip_ratio/high_mean": 0.0007129043224267662, "clip_ratio/low_mean": 0.0005907068980377517, "clip_ratio/low_min": 1.575299393152818e-05, "clip_ratio/region_mean": 0.0013036112177360337, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3533.0, "completions/mean_length": 670.654052734375, "completions/mean_terminated_length": 576.37841796875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 0.5690962099125364, "grad_norm": 0.11143632978200912, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 35298027.0, "reward": 0.5245535969734192, "reward_std": 0.19756564497947693, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756911277771, "step": 61 }, { "clip_ratio/high_max": 0.002405178027402144, "clip_ratio/high_mean": 0.001002484841592377, "clip_ratio/low_mean": 0.0007505635776396957, "clip_ratio/low_min": 6.556402513524517e-05, "clip_ratio/region_mean": 0.0017530484037706628, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3813.0, "completions/mean_length": 652.1495971679688, "completions/mean_terminated_length": 589.5340576171875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 0.5784256559766764, "grad_norm": 0.13426145911216736, "learning_rate": 1e-06, "loss": 0.0179, "num_tokens": 35900785.0, "reward": 0.515625, "reward_std": 0.2607671618461609, "rewards/verify_math_reward/mean": 0.515625, "rewards/verify_math_reward/std": 0.5000349283218384, "step": 62 }, { "clip_ratio/high_max": 0.002157864728360437, "clip_ratio/high_mean": 0.0008281393202196341, "clip_ratio/low_mean": 0.0005194035911699757, "clip_ratio/low_min": 1.8590124454931356e-05, "clip_ratio/region_mean": 0.0013475428931997158, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2297.0, "completions/mean_length": 580.6674194335938, "completions/mean_terminated_length": 528.9127807617188, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "epoch": 0.5877551020408164, "grad_norm": 0.12360631674528122, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 36446471.0, "reward": 0.5848214626312256, "reward_std": 0.20023591816425323, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 63 }, { "clip_ratio/high_max": 0.0023264320770977065, "clip_ratio/high_mean": 0.0009458195745537523, "clip_ratio/low_mean": 0.0006299283477346762, "clip_ratio/low_min": 1.771792994986754e-05, "clip_ratio/region_mean": 0.0015757478977320716, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 632.6730346679688, "completions/mean_terminated_length": 553.6015625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.5970845481049563, "grad_norm": 0.13188877701759338, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 37021194.0, "reward": 0.6261160969734192, "reward_std": 0.20310094952583313, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 64 }, { "clip_ratio/high_max": 0.001885223107819911, "clip_ratio/high_mean": 0.0006958433959880495, "clip_ratio/low_mean": 0.0005695299187209457, "clip_ratio/low_min": 6.910875708854292e-05, "clip_ratio/region_mean": 0.001265373342903331, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3799.0, "completions/mean_length": 609.9017944335938, "completions/mean_terminated_length": 570.5552978515625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.6064139941690962, "grad_norm": 0.12055953592061996, "learning_rate": 1e-06, "loss": 0.0326, "num_tokens": 37624386.0, "reward": 0.5491071939468384, "reward_std": 0.17585155367851257, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 65 }, { "clip_ratio/high_max": 0.0020190873547107913, "clip_ratio/high_mean": 0.0008522569078195374, "clip_ratio/low_mean": 0.0006081984229240334, "clip_ratio/low_min": 3.8328054870362394e-05, "clip_ratio/region_mean": 0.0014604553034587298, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3076.0, "completions/mean_length": 584.3125, "completions/mean_terminated_length": 524.5221557617188, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.6157434402332361, "grad_norm": 0.12680187821388245, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 38185026.0, "reward": 0.5915178656578064, "reward_std": 0.2031630128622055, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 66 }, { "clip_ratio/high_max": 0.0018833797148545273, "clip_ratio/high_mean": 0.0007615654922119575, "clip_ratio/low_mean": 0.0005263847488095053, "clip_ratio/low_min": 1.594794593984261e-05, "clip_ratio/region_mean": 0.0012879502428404521, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 676.0111694335938, "completions/mean_terminated_length": 617.7821044921875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.6250728862973761, "grad_norm": 0.10803545266389847, "learning_rate": 1e-06, "loss": -0.0159, "num_tokens": 38824964.0, "reward": 0.5334821939468384, "reward_std": 0.1932484656572342, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 67 }, { "clip_ratio/high_max": 0.002362093233386986, "clip_ratio/high_mean": 0.00093513973752124, "clip_ratio/low_mean": 0.0006717547585139982, "clip_ratio/low_min": 8.071445336099714e-05, "clip_ratio/region_mean": 0.001606894515134627, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2535.0, "completions/mean_length": 593.169677734375, "completions/mean_terminated_length": 545.6199340820312, "completions/min_length": 5.0, "completions/min_terminated_length": 5.0, "epoch": 0.634402332361516, "grad_norm": 0.12663999199867249, "learning_rate": 1e-06, "loss": 0.014, "num_tokens": 39390916.0, "reward": 0.5580357313156128, "reward_std": 0.19655926525592804, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 68 }, { "clip_ratio/high_max": 0.0027452273498056456, "clip_ratio/high_mean": 0.0010903024740400724, "clip_ratio/low_mean": 0.0006977858447498875, "clip_ratio/low_min": 2.55620425377856e-05, "clip_ratio/region_mean": 0.0017880883387988433, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3469.0, "completions/mean_length": 686.4498291015625, "completions/mean_terminated_length": 592.6089477539062, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 0.643731778425656, "grad_norm": 0.13761483132839203, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 39998039.0, "reward": 0.5580357313156128, "reward_std": 0.2650946080684662, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 69 }, { "clip_ratio/high_max": 0.0020036659843754023, "clip_ratio/high_mean": 0.0008543112780898809, "clip_ratio/low_mean": 0.0006879072552692378, "clip_ratio/low_min": 3.6289940908318385e-05, "clip_ratio/region_mean": 0.0015422185533680022, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 638.3660888671875, "completions/mean_terminated_length": 579.4960327148438, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 0.6530612244897959, "grad_norm": 0.1263047307729721, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 40598615.0, "reward": 0.543526828289032, "reward_std": 0.22634737193584442, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838003516197205, "step": 70 }, { "clip_ratio/high_max": 0.0018194154508819338, "clip_ratio/high_mean": 0.0006804778440709924, "clip_ratio/low_mean": 0.0007111599461495643, "clip_ratio/low_min": 6.525953904201742e-05, "clip_ratio/region_mean": 0.0013916377938585356, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 594.4263916015625, "completions/mean_terminated_length": 550.9039916992188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 0.6623906705539359, "grad_norm": 0.12367860972881317, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 41185749.0, "reward": 0.5491071939468384, "reward_std": 0.19320890307426453, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 71 }, { "clip_ratio/high_max": 0.00217145054921275, "clip_ratio/high_mean": 0.0007920087136881193, "clip_ratio/low_mean": 0.0006000880857754964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001392096768540796, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3970.0, "completions/mean_length": 614.833740234375, "completions/mean_terminated_length": 563.5820922851562, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 0.6717201166180758, "grad_norm": 0.1266845017671585, "learning_rate": 1e-06, "loss": 0.0054, "num_tokens": 41780832.0, "reward": 0.5267857313156128, "reward_std": 0.19486747682094574, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 72 }, { "clip_ratio/high_max": 0.0018032259722531307, "clip_ratio/high_mean": 0.0006822762316005537, "clip_ratio/low_mean": 0.0005624001751129981, "clip_ratio/low_min": 3.8722011595382355e-05, "clip_ratio/region_mean": 0.0012446763867046684, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 649.0892944335938, "completions/mean_terminated_length": 570.3927001953125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.6810495626822157, "grad_norm": 0.11287780106067657, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 42377272.0, "reward": 0.5558035969734192, "reward_std": 0.1748398393392563, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 73 }, { "clip_ratio/high_max": 0.0022270442932494916, "clip_ratio/high_mean": 0.000858560995766311, "clip_ratio/low_mean": 0.000592833250266267, "clip_ratio/low_min": 3.8368839341274e-05, "clip_ratio/region_mean": 0.001451394236937631, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3249.0, "completions/mean_length": 677.068115234375, "completions/mean_terminated_length": 591.0079956054688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 0.6903790087463557, "grad_norm": 0.12197376787662506, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 42983517.0, "reward": 0.5613839626312256, "reward_std": 0.20771123468875885, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 74 }, { "clip_ratio/high_max": 0.0016667769705236424, "clip_ratio/high_mean": 0.0006762091416021576, "clip_ratio/low_mean": 0.0005556680225708988, "clip_ratio/low_min": 3.057032972719753e-05, "clip_ratio/region_mean": 0.0012318771368882153, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 650.880615234375, "completions/mean_terminated_length": 596.1961669921875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.6997084548104956, "grad_norm": 0.11935737729072571, "learning_rate": 1e-06, "loss": 0.0118, "num_tokens": 43594426.0, "reward": 0.5267857313156128, "reward_std": 0.18881477415561676, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 75 }, { "clip_ratio/high_max": 0.0020895929555990733, "clip_ratio/high_mean": 0.0009295289673900697, "clip_ratio/low_mean": 0.0007338959840126336, "clip_ratio/low_min": 8.660398543725023e-05, "clip_ratio/region_mean": 0.0016634249477647245, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3594.0, "completions/mean_length": 680.5892944335938, "completions/mean_terminated_length": 602.6118774414062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 0.7090379008746356, "grad_norm": 0.13902558386325836, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 44215554.0, "reward": 0.5234375, "reward_std": 0.2520551383495331, "rewards/verify_math_reward/mean": 0.5234375, "rewards/verify_math_reward/std": 0.49972933530807495, "step": 76 }, { "clip_ratio/high_max": 0.0023232634193846025, "clip_ratio/high_mean": 0.0009882602316793054, "clip_ratio/low_mean": 0.0006640208102908218, "clip_ratio/low_min": 6.0430887060647365e-05, "clip_ratio/region_mean": 0.001652281036513159, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2344.0, "completions/mean_length": 647.7913208007812, "completions/mean_terminated_length": 548.818603515625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.7183673469387755, "grad_norm": 0.13488072156906128, "learning_rate": 1e-06, "loss": -0.0077, "num_tokens": 44785199.0, "reward": 0.5625, "reward_std": 0.2246202826499939, "rewards/verify_math_reward/mean": 0.5625, "rewards/verify_math_reward/std": 0.49635544419288635, "step": 77 }, { "clip_ratio/high_max": 0.002175509653170593, "clip_ratio/high_mean": 0.0008506925369147211, "clip_ratio/low_mean": 0.0005729131144107669, "clip_ratio/low_min": 1.6382700778194703e-05, "clip_ratio/region_mean": 0.001423605666786898, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3663.0, "completions/mean_length": 616.036865234375, "completions/mean_terminated_length": 556.78662109375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 0.7276967930029155, "grad_norm": 0.1300690472126007, "learning_rate": 1e-06, "loss": 0.007, "num_tokens": 45361640.0, "reward": 0.5970982313156128, "reward_std": 0.1929485946893692, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 78 }, { "clip_ratio/high_max": 0.002026535803452134, "clip_ratio/high_mean": 0.0008791490945441183, "clip_ratio/low_mean": 0.0005753004415964824, "clip_ratio/low_min": 1.3280918210512027e-05, "clip_ratio/region_mean": 0.0014544495243171696, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 592.9308471679688, "completions/mean_terminated_length": 533.2871704101562, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 0.7370262390670554, "grad_norm": 0.13578945398330688, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 45920554.0, "reward": 0.5870535969734192, "reward_std": 0.21278755366802216, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 79 }, { "clip_ratio/high_max": 0.001978152969968505, "clip_ratio/high_mean": 0.0009649189232732169, "clip_ratio/low_mean": 0.0005753234981966671, "clip_ratio/low_min": 2.4118670808093157e-05, "clip_ratio/region_mean": 0.0015402423960040323, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 571.3772583007812, "completions/mean_terminated_length": 519.48583984375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.7463556851311953, "grad_norm": 0.14063102006912231, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 46469308.0, "reward": 0.6127232313156128, "reward_std": 0.22371943295001984, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 80 }, { "clip_ratio/high_max": 0.001734588273393456, "clip_ratio/high_mean": 0.0006513315038318979, "clip_ratio/low_mean": 0.000669722448947141, "clip_ratio/low_min": 2.6404732125229202e-05, "clip_ratio/region_mean": 0.0013210539618739858, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 658.5089721679688, "completions/mean_terminated_length": 592.0272827148438, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.7556851311953353, "grad_norm": 0.12003110349178314, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 47085516.0, "reward": 0.546875, "reward_std": 0.20770801603794098, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 81 }, { "clip_ratio/high_max": 0.0021562257752520964, "clip_ratio/high_mean": 0.000907462617760757, "clip_ratio/low_mean": 0.0005941133849773905, "clip_ratio/low_min": 3.771137744479347e-05, "clip_ratio/region_mean": 0.0015015759781817906, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3714.0, "completions/mean_length": 661.5949096679688, "completions/mean_terminated_length": 583.1837768554688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 0.7650145772594752, "grad_norm": 0.11883300542831421, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 47690553.0, "reward": 0.5814732313156128, "reward_std": 0.1907336413860321, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 82 }, { "clip_ratio/high_max": 0.0022070456616347656, "clip_ratio/high_mean": 0.0007432409220200498, "clip_ratio/low_mean": 0.0005474028639582684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012906437877973076, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2399.0, "completions/mean_length": 606.53125, "completions/mean_terminated_length": 551.1428833007812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 0.7743440233236152, "grad_norm": 0.1223452165722847, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 48261877.0, "reward": 0.640625, "reward_std": 0.19531255960464478, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 83 }, { "clip_ratio/high_max": 0.0021039797793491744, "clip_ratio/high_mean": 0.0008076017456914997, "clip_ratio/low_mean": 0.0006205403151398059, "clip_ratio/low_min": 4.76356472063344e-05, "clip_ratio/region_mean": 0.0014281420735642314, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 655.1707763671875, "completions/mean_terminated_length": 560.468994140625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.7836734693877551, "grad_norm": 0.13013124465942383, "learning_rate": 1e-06, "loss": -0.0138, "num_tokens": 48846222.0, "reward": 0.4933035969734192, "reward_std": 0.21158543229103088, "rewards/verify_math_reward/mean": 0.4933035671710968, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 84 }, { "clip_ratio/high_max": 0.0019734797679120675, "clip_ratio/high_mean": 0.0008554749056202127, "clip_ratio/low_mean": 0.0004858352594965254, "clip_ratio/low_min": 2.2116064428701065e-05, "clip_ratio/region_mean": 0.0013413101914920844, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3247.0, "completions/mean_length": 645.2779541015625, "completions/mean_terminated_length": 550.3038940429688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.793002915451895, "grad_norm": 0.12221498042345047, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 49415815.0, "reward": 0.5658482313156128, "reward_std": 0.1856580227613449, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 85 }, { "clip_ratio/high_max": 0.0016927571450651158, "clip_ratio/high_mean": 0.0006764325917174574, "clip_ratio/low_mean": 0.00042965691227436764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011060894648835529, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 596.411865234375, "completions/mean_terminated_length": 568.8560180664062, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.8023323615160349, "grad_norm": 0.10956127196550369, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 50008392.0, "reward": 0.6149553656578064, "reward_std": 0.16863587498664856, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 86 }, { "clip_ratio/high_max": 0.002186433153838152, "clip_ratio/high_mean": 0.0009657641330704791, "clip_ratio/low_mean": 0.0005564167249758611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015221808716887608, "completions/clipped_ratio": 0.006696428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3365.0, "completions/mean_length": 554.3392944335938, "completions/mean_terminated_length": 530.4629516601562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.8116618075801749, "grad_norm": 0.13649345934391022, "learning_rate": 1e-06, "loss": 0.0163, "num_tokens": 50561168.0, "reward": 0.6238839626312256, "reward_std": 0.19629782438278198, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 87 }, { "clip_ratio/high_max": 0.00287763597589219, "clip_ratio/high_mean": 0.0009845189670159016, "clip_ratio/low_mean": 0.0007092171417752979, "clip_ratio/low_min": 5.837240314576775e-05, "clip_ratio/region_mean": 0.0016937361142481677, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 591.216552734375, "completions/mean_terminated_length": 519.364501953125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.8209912536443149, "grad_norm": 0.1409979611635208, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 51097242.0, "reward": 0.6283482313156128, "reward_std": 0.18588557839393616, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 88 }, { "clip_ratio/high_max": 0.002211398081271909, "clip_ratio/high_mean": 0.0008753508591325954, "clip_ratio/low_mean": 0.0005992916412651539, "clip_ratio/low_min": 2.312245669600088e-05, "clip_ratio/region_mean": 0.0014746425076737069, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2010.0, "completions/mean_length": 615.833740234375, "completions/mean_terminated_length": 560.5929565429688, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.8303206997084548, "grad_norm": 0.13437823951244354, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 51693533.0, "reward": 0.5178571939468384, "reward_std": 0.1976105272769928, "rewards/verify_math_reward/mean": 0.5178571343421936, "rewards/verify_math_reward/std": 0.4999600946903229, "step": 89 }, { "clip_ratio/high_max": 0.0019194139240426011, "clip_ratio/high_mean": 0.0007599462496727938, "clip_ratio/low_mean": 0.0006348003917082679, "clip_ratio/low_min": 2.6421475922688842e-05, "clip_ratio/region_mean": 0.001394746661389945, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3274.0, "completions/mean_length": 638.2377319335938, "completions/mean_terminated_length": 567.3496704101562, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 0.8396501457725948, "grad_norm": 0.13324740529060364, "learning_rate": 1e-06, "loss": 0.0081, "num_tokens": 52279218.0, "reward": 0.551339328289032, "reward_std": 0.20012825727462769, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 90 }, { "clip_ratio/high_max": 0.0021048882190370932, "clip_ratio/high_mean": 0.000845737680720049, "clip_ratio/low_mean": 0.0006471639298979426, "clip_ratio/low_min": 2.8259139980946202e-05, "clip_ratio/region_mean": 0.0014929016251699068, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 615.5725708007812, "completions/mean_terminated_length": 548.260498046875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.8489795918367347, "grad_norm": 0.14451470971107483, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 52849099.0, "reward": 0.59375, "reward_std": 0.19444282352924347, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 91 }, { "clip_ratio/high_max": 0.001944730418472318, "clip_ratio/high_mean": 0.000814591403468512, "clip_ratio/low_mean": 0.00047515574715362163, "clip_ratio/low_min": 1.359582347504329e-05, "clip_ratio/region_mean": 0.0012897471751784906, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3802.0, "completions/mean_length": 650.4029541015625, "completions/mean_terminated_length": 571.7362670898438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.8583090379008746, "grad_norm": 0.12525710463523865, "learning_rate": 1e-06, "loss": -0.0175, "num_tokens": 53437020.0, "reward": 0.6238839626312256, "reward_std": 0.18904298543930054, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 92 }, { "clip_ratio/high_max": 0.002262782698380761, "clip_ratio/high_mean": 0.001004584528345731, "clip_ratio/low_mean": 0.0007672012488910696, "clip_ratio/low_min": 5.998021788400365e-05, "clip_ratio/region_mean": 0.0017717857917887159, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3590.0, "completions/mean_length": 597.5491333007812, "completions/mean_terminated_length": 546.0430297851562, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 0.8676384839650145, "grad_norm": 0.1381644457578659, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 54016712.0, "reward": 0.5837053656578064, "reward_std": 0.25426867604255676, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 93 }, { "clip_ratio/high_max": 0.0018785912179737352, "clip_ratio/high_mean": 0.0008331616409122944, "clip_ratio/low_mean": 0.0005843826238560723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001417544270225335, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2449.0, "completions/mean_length": 607.6015625, "completions/mean_terminated_length": 556.2434692382812, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 0.8769679300291545, "grad_norm": 0.12567032873630524, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 54584627.0, "reward": 0.5881696939468384, "reward_std": 0.1946403682231903, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 94 }, { "clip_ratio/high_max": 0.0017815181017795112, "clip_ratio/high_mean": 0.0007609570038766833, "clip_ratio/low_mean": 0.0007353126402449561, "clip_ratio/low_min": 5.828756729897577e-05, "clip_ratio/region_mean": 0.0014962696586735547, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3783.0, "completions/mean_length": 690.1484985351562, "completions/mean_terminated_length": 592.3914794921875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 0.8862973760932945, "grad_norm": 0.1269090622663498, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 55188096.0, "reward": 0.527901828289032, "reward_std": 0.22631528973579407, "rewards/verify_math_reward/mean": 0.5279017686843872, "rewards/verify_math_reward/std": 0.49949970841407776, "step": 95 }, { "clip_ratio/high_max": 0.0023371442439383827, "clip_ratio/high_mean": 0.0010629585303831846, "clip_ratio/low_mean": 0.0005052440212693909, "clip_ratio/low_min": 3.7416256418509874e-05, "clip_ratio/region_mean": 0.0015682025332353078, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3382.0, "completions/mean_length": 640.2109375, "completions/mean_terminated_length": 573.3754272460938, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 0.8956268221574344, "grad_norm": 0.12552319467067719, "learning_rate": 1e-06, "loss": 0.0034, "num_tokens": 55775549.0, "reward": 0.5959821939468384, "reward_std": 0.21726585924625397, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 96 }, { "clip_ratio/high_max": 0.0019180375420546625, "clip_ratio/high_mean": 0.0007718974793533562, "clip_ratio/low_mean": 0.0006347041562548839, "clip_ratio/low_min": 2.688036965992069e-05, "clip_ratio/region_mean": 0.0014066016374272294, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2894.0, "completions/mean_length": 617.6864013671875, "completions/mean_terminated_length": 566.4767456054688, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 0.9049562682215744, "grad_norm": 0.1271030753850937, "learning_rate": 1e-06, "loss": 0.0163, "num_tokens": 56367044.0, "reward": 0.5580357313156128, "reward_std": 0.19974736869335175, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 97 }, { "clip_ratio/high_max": 0.0018232932598039042, "clip_ratio/high_mean": 0.0006611393855564529, "clip_ratio/low_mean": 0.0004289146963856183, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010900540910370182, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2921.0, "completions/mean_length": 710.0848388671875, "completions/mean_terminated_length": 620.8797607421875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.9142857142857143, "grad_norm": 0.10877560079097748, "learning_rate": 1e-06, "loss": -0.0082, "num_tokens": 56995000.0, "reward": 0.5223214626312256, "reward_std": 0.18629701435565948, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 98 }, { "clip_ratio/high_max": 0.0019879872706951573, "clip_ratio/high_mean": 0.0008827278543321881, "clip_ratio/low_mean": 0.0005190918327571126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014018196816323325, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 676.8449096679688, "completions/mean_terminated_length": 574.6632080078125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 0.9236151603498542, "grad_norm": 0.12853676080703735, "learning_rate": 1e-06, "loss": 0.0135, "num_tokens": 57594173.0, "reward": 0.494419664144516, "reward_std": 0.20726223289966583, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 99 }, { "clip_ratio/high_max": 0.0018739871666184627, "clip_ratio/high_mean": 0.0007508181433877326, "clip_ratio/low_mean": 0.0004454578192962799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011962759890593588, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3519.0, "completions/mean_length": 662.8683471679688, "completions/mean_terminated_length": 592.4852294921875, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 0.9329446064139941, "grad_norm": 0.11898582428693771, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 58204639.0, "reward": 0.578125, "reward_std": 0.17757542431354523, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 100 }, { "clip_ratio/high_max": 0.0021525089468923397, "clip_ratio/high_mean": 0.0008797519258223474, "clip_ratio/low_mean": 0.0006131259842732106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014928779273759574, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3869.0, "completions/mean_length": 637.5346069335938, "completions/mean_terminated_length": 574.6533813476562, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 0.9422740524781341, "grad_norm": 0.13018183410167694, "learning_rate": 1e-06, "loss": 0.0068, "num_tokens": 58795854.0, "reward": 0.5691964626312256, "reward_std": 0.20992687344551086, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 101 }, { "clip_ratio/high_max": 0.0020191000949125737, "clip_ratio/high_mean": 0.0007565638316009426, "clip_ratio/low_mean": 0.0006582891796824697, "clip_ratio/low_min": 1.2278978829272091e-05, "clip_ratio/region_mean": 0.0014148530244710855, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2296.0, "completions/mean_length": 621.388427734375, "completions/mean_terminated_length": 566.23583984375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 0.9516034985422741, "grad_norm": 0.12451858818531036, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 59395186.0, "reward": 0.559151828289032, "reward_std": 0.19505223631858826, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 102 }, { "clip_ratio/high_max": 0.0018583473174658138, "clip_ratio/high_mean": 0.000765695990594395, "clip_ratio/low_mean": 0.0005216967219894286, "clip_ratio/low_min": 3.714982904057251e-05, "clip_ratio/region_mean": 0.0012873927153123077, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3946.0, "completions/mean_length": 711.3203735351562, "completions/mean_terminated_length": 641.9305419921875, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 0.960932944606414, "grad_norm": 0.11913493275642395, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 60046265.0, "reward": 0.5245535969734192, "reward_std": 0.18960891664028168, "rewards/verify_math_reward/mean": 0.5245535969734192, "rewards/verify_math_reward/std": 0.4996756613254547, "step": 103 }, { "clip_ratio/high_max": 0.0019098481170658488, "clip_ratio/high_mean": 0.0007057281472953036, "clip_ratio/low_mean": 0.0007603598660352873, "clip_ratio/low_min": 6.83317712173448e-05, "clip_ratio/region_mean": 0.0014660880333394744, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2183.0, "completions/mean_length": 643.609375, "completions/mean_terminated_length": 552.6529541015625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.970262390670554, "grad_norm": 0.13780678808689117, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 60621675.0, "reward": 0.5736607313156128, "reward_std": 0.20978209376335144, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 104 }, { "clip_ratio/high_max": 0.0018821418343577534, "clip_ratio/high_mean": 0.0007210173389466945, "clip_ratio/low_mean": 0.0006037004368408816, "clip_ratio/low_min": 1.3891975868318696e-05, "clip_ratio/region_mean": 0.0013247177630546503, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 582.6183471679688, "completions/mean_terminated_length": 526.850341796875, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "epoch": 0.9795918367346939, "grad_norm": 0.13550148904323578, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 61174253.0, "reward": 0.5368303656578064, "reward_std": 0.19343574345111847, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 105 }, { "clip_ratio/high_max": 0.0018749735718301963, "clip_ratio/high_mean": 0.0007686431654292392, "clip_ratio/low_mean": 0.0005044213003202458, "clip_ratio/low_min": 1.252254060091218e-05, "clip_ratio/region_mean": 0.0012730644848488737, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2133.0, "completions/mean_length": 607.5402221679688, "completions/mean_terminated_length": 556.1812133789062, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 0.9889212827988338, "grad_norm": 0.11981673538684845, "learning_rate": 1e-06, "loss": 0.0074, "num_tokens": 61747553.0, "reward": 0.5691964626312256, "reward_std": 0.18002675473690033, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 106 }, { "clip_ratio/high_max": 0.002335174933250528, "clip_ratio/high_mean": 0.0008437378983217059, "clip_ratio/low_mean": 0.0006078883779991884, "clip_ratio/low_min": 1.4035481399332639e-05, "clip_ratio/region_mean": 0.0014516263036057353, "completions/clipped_ratio": 0.025568181818181768, "completions/max_length": 4096.0, "completions/max_terminated_length": 2461.0, "completions/mean_length": 594.6704711914062, "completions/mean_terminated_length": 502.7988586425781, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 0.9982507288629737, "grad_norm": 0.13220836222171783, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 62317871.0, "reward": 0.5535714626312256, "reward_std": 0.19820643961429596, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973994791507721, "step": 107 }, { "clip_ratio/high_max": 0.0022387665885617025, "clip_ratio/high_mean": 0.000920310674700886, "clip_ratio/low_mean": 0.0006362590484059183, "clip_ratio/low_min": 4.261960566509515e-05, "clip_ratio/region_mean": 0.001556569717649836, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3271.0, "completions/mean_length": 645.7924194335938, "completions/mean_terminated_length": 558.945068359375, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 1.00932944606414, "grad_norm": 0.1348712146282196, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 62896813.0, "reward": 0.5714285969734192, "reward_std": 0.22687985002994537, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514806270599365, "step": 108 }, { "clip_ratio/high_max": 0.0021707252963096835, "clip_ratio/high_mean": 0.0009288831206504256, "clip_ratio/low_mean": 0.000522829201145214, "clip_ratio/low_min": 2.4734090402489528e-05, "clip_ratio/region_mean": 0.0014517123272526078, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 619.0145263671875, "completions/mean_terminated_length": 551.76904296875, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 1.01865889212828, "grad_norm": 0.1271272599697113, "learning_rate": 1e-06, "loss": 0.0121, "num_tokens": 63478266.0, "reward": 0.5892857313156128, "reward_std": 0.19823963940143585, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 109 }, { "clip_ratio/high_max": 0.0018047954326902982, "clip_ratio/high_mean": 0.0007129164569050772, "clip_ratio/low_mean": 0.00042467317234695656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011375896174286027, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 702.435302734375, "completions/mean_terminated_length": 580.816162109375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.0279883381924197, "grad_norm": 0.12314493954181671, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 64073672.0, "reward": 0.5212053656578064, "reward_std": 0.17389945685863495, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 110 }, { "clip_ratio/high_max": 0.0017495191896159668, "clip_ratio/high_mean": 0.0006691754142593709, "clip_ratio/low_mean": 0.0004947921643179143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001163967597676674, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 647.2176513671875, "completions/mean_terminated_length": 556.3562622070312, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 1.0373177842565597, "grad_norm": 0.11964970827102661, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 64663747.0, "reward": 0.5267857313156128, "reward_std": 0.16743306815624237, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 111 }, { "clip_ratio/high_max": 0.001938575558597222, "clip_ratio/high_mean": 0.0008848465495248092, "clip_ratio/low_mean": 0.0007573944767500507, "clip_ratio/low_min": 5.194687491894001e-05, "clip_ratio/region_mean": 0.0016422410117229447, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3994.0, "completions/mean_length": 610.716552734375, "completions/mean_terminated_length": 543.310546875, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 1.0466472303206997, "grad_norm": 0.13910211622714996, "learning_rate": 1e-06, "loss": 0.0075, "num_tokens": 65235893.0, "reward": 0.5892857313156128, "reward_std": 0.21459950506687164, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 112 }, { "clip_ratio/high_max": 0.0021278136191540398, "clip_ratio/high_mean": 0.0008376358291570796, "clip_ratio/low_mean": 0.0006558591594512109, "clip_ratio/low_min": 4.088420246262103e-05, "clip_ratio/region_mean": 0.0014934949977032375, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2864.0, "completions/mean_length": 625.779052734375, "completions/mean_terminated_length": 554.6355590820312, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 1.0559766763848397, "grad_norm": 0.1188901960849762, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 65813495.0, "reward": 0.5714285969734192, "reward_std": 0.1983586996793747, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 113 }, { "clip_ratio/high_max": 0.0018923180177807808, "clip_ratio/high_mean": 0.0007647187485417817, "clip_ratio/low_mean": 0.0006024704180163098, "clip_ratio/low_min": 1.3501836292562075e-05, "clip_ratio/region_mean": 0.001367189186566975, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 617.6842041015625, "completions/mean_terminated_length": 550.4129638671875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 1.0653061224489795, "grad_norm": 0.12145751714706421, "learning_rate": 1e-06, "loss": 0.0067, "num_tokens": 66408756.0, "reward": 0.5223214626312256, "reward_std": 0.1786727011203766, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 114 }, { "clip_ratio/high_max": 0.0016835417263791896, "clip_ratio/high_mean": 0.0006536952914757421, "clip_ratio/low_mean": 0.0004384264557302231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010921217544819228, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 629.193115234375, "completions/mean_terminated_length": 554.0855102539062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.0746355685131195, "grad_norm": 0.12573300302028656, "learning_rate": 1e-06, "loss": 0.0053, "num_tokens": 66982601.0, "reward": 0.5580357313156128, "reward_std": 0.17559054493904114, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 115 }, { "clip_ratio/high_max": 0.0017454099142923951, "clip_ratio/high_mean": 0.0007543304909631843, "clip_ratio/low_mean": 0.0005247638746368466, "clip_ratio/low_min": 1.396336028847145e-05, "clip_ratio/region_mean": 0.0012790943510481156, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 612.419677734375, "completions/mean_terminated_length": 569.1209106445312, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 1.0839650145772595, "grad_norm": 0.12264445424079895, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 67574113.0, "reward": 0.6350446939468384, "reward_std": 0.19238336384296417, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 116 }, { "clip_ratio/high_max": 0.001786453249223996, "clip_ratio/high_mean": 0.0007486339327442693, "clip_ratio/low_mean": 0.000482122797620832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012307567230891436, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 630.3046875, "completions/mean_terminated_length": 551.17919921875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.0932944606413995, "grad_norm": 0.12435305863618851, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 68142066.0, "reward": 0.5926339626312256, "reward_std": 0.2005397230386734, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 117 }, { "clip_ratio/high_max": 0.0017564252302690875, "clip_ratio/high_mean": 0.0006838038789283019, "clip_ratio/low_mean": 0.00044850763742942945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001132311484980164, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2756.0, "completions/mean_length": 617.4921875, "completions/mean_terminated_length": 554.24658203125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.1026239067055394, "grad_norm": 0.1242511197924614, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 68704155.0, "reward": 0.6328125, "reward_std": 0.16747723519802094, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 118 }, { "clip_ratio/high_max": 0.0017772580322343856, "clip_ratio/high_mean": 0.0007229723705677316, "clip_ratio/low_mean": 0.0005288728652885766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012518452131189406, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 586.9732666015625, "completions/mean_terminated_length": 523.1727294921875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.1119533527696792, "grad_norm": 0.1337558925151825, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 69238171.0, "reward": 0.6428571939468384, "reward_std": 0.1710776388645172, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 119 }, { "clip_ratio/high_max": 0.0015808707248652354, "clip_ratio/high_mean": 0.0006234206184672075, "clip_ratio/low_mean": 0.0005845670289090776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012079876032657921, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 643.427490234375, "completions/mean_terminated_length": 580.6533813476562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 1.1212827988338192, "grad_norm": 0.12093667685985565, "learning_rate": 1e-06, "loss": 0.0084, "num_tokens": 69845786.0, "reward": 0.5703125, "reward_std": 0.19471341371536255, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 120 }, { "clip_ratio/high_max": 0.0017549932235851884, "clip_ratio/high_mean": 0.0006848561370134121, "clip_ratio/low_mean": 0.0006726086385242525, "clip_ratio/low_min": 1.0254307198920287e-05, "clip_ratio/region_mean": 0.001357464770990191, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 686.0089721679688, "completions/mean_terminated_length": 584.1011352539062, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.1306122448979592, "grad_norm": 0.12279097735881805, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 70451154.0, "reward": 0.5223214626312256, "reward_std": 0.17611117660999298, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 121 }, { "clip_ratio/high_max": 0.0018810498586390167, "clip_ratio/high_mean": 0.0008115886794257676, "clip_ratio/low_mean": 0.00046586242297053104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012774511160387192, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 605.732177734375, "completions/mean_terminated_length": 546.3065185546875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 1.1399416909620992, "grad_norm": 0.12779517471790314, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 71011018.0, "reward": 0.6160714626312256, "reward_std": 0.1821650117635727, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 122 }, { "clip_ratio/high_max": 0.0017122595381806605, "clip_ratio/high_mean": 0.0006494648532680003, "clip_ratio/low_mean": 0.0006846807918918785, "clip_ratio/low_min": 2.707682506297715e-05, "clip_ratio/region_mean": 0.0013341456651687622, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3643.0, "completions/mean_length": 658.9241333007812, "completions/mean_terminated_length": 588.4601440429688, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 1.149271137026239, "grad_norm": 0.13286446034908295, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 71617774.0, "reward": 0.5814732313156128, "reward_std": 0.21203728020191193, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 123 }, { "clip_ratio/high_max": 0.001713228179141879, "clip_ratio/high_mean": 0.0007118931907825754, "clip_ratio/low_mean": 0.0006066005862521706, "clip_ratio/low_min": 2.9062808607704937e-05, "clip_ratio/region_mean": 0.001318493752478389, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 598.6339721679688, "completions/mean_terminated_length": 547.143798828125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 1.158600583090379, "grad_norm": 0.14108534157276154, "learning_rate": 1e-06, "loss": 0.0061, "num_tokens": 72190414.0, "reward": 0.6049107313156128, "reward_std": 0.19196967780590057, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 124 }, { "clip_ratio/high_max": 0.0012990183022338897, "clip_ratio/high_mean": 0.00048056226751214126, "clip_ratio/low_mean": 0.0003377165571691876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008182788296835497, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3494.0, "completions/mean_length": 720.2489013671875, "completions/mean_terminated_length": 587.0985717773438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 1.167930029154519, "grad_norm": 0.10980133712291718, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 72786837.0, "reward": 0.5033482313156128, "reward_std": 0.13801473379135132, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 125 }, { "clip_ratio/high_max": 0.0019630566603154875, "clip_ratio/high_mean": 0.0007658281065232586, "clip_ratio/low_mean": 0.0004464349394766032, "clip_ratio/low_min": 1.295605306950165e-05, "clip_ratio/region_mean": 0.0012122630214435048, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 604.15625, "completions/mean_terminated_length": 544.7037963867188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 1.177259475218659, "grad_norm": 0.12922635674476624, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 73355201.0, "reward": 0.6127232313156128, "reward_std": 0.17697879672050476, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 126 }, { "clip_ratio/high_max": 0.0018201843595306855, "clip_ratio/high_mean": 0.0006656213408859912, "clip_ratio/low_mean": 0.000634996009466704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013006173285248224, "completions/clipped_ratio": 0.0078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2575.0, "completions/mean_length": 573.984375, "completions/mean_terminated_length": 546.2520141601562, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 1.186588921282799, "grad_norm": 0.12683415412902832, "learning_rate": 1e-06, "loss": 0.0104, "num_tokens": 73923419.0, "reward": 0.629464328289032, "reward_std": 0.1753644049167633, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 127 }, { "clip_ratio/high_max": 0.0018087178468704224, "clip_ratio/high_mean": 0.0008138277462421684, "clip_ratio/low_mean": 0.0005633177388517652, "clip_ratio/low_min": 2.562715144449612e-05, "clip_ratio/region_mean": 0.0013771454723610077, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3380.0, "completions/mean_length": 636.6652221679688, "completions/mean_terminated_length": 561.719482421875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 1.1959183673469387, "grad_norm": 0.13377544283866882, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 74507967.0, "reward": 0.5714285969734192, "reward_std": 0.2176763117313385, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 128 }, { "clip_ratio/high_max": 0.002272482895932626, "clip_ratio/high_mean": 0.0008079380513663637, "clip_ratio/low_mean": 0.0007487964085157728, "clip_ratio/low_min": 3.629790353443241e-05, "clip_ratio/region_mean": 0.0015567344780720305, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2323.0, "completions/mean_length": 637.7879638671875, "completions/mean_terminated_length": 558.8333129882812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.2052478134110787, "grad_norm": 0.14027684926986694, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 75090593.0, "reward": 0.5569196939468384, "reward_std": 0.22007599472999573, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 129 }, { "clip_ratio/high_max": 0.0017475325330451597, "clip_ratio/high_mean": 0.0006926625828782562, "clip_ratio/low_mean": 0.000576557072236028, "clip_ratio/low_min": 4.1899011193891056e-05, "clip_ratio/region_mean": 0.0012692196542047895, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 730.8136596679688, "completions/mean_terminated_length": 594.0173950195312, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.2145772594752187, "grad_norm": 0.13198024034500122, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 75695186.0, "reward": 0.5033482313156128, "reward_std": 0.20410902798175812, "rewards/verify_math_reward/mean": 0.5033482313156128, "rewards/verify_math_reward/std": 0.5002680420875549, "step": 130 }, { "clip_ratio/high_max": 0.0018253561356686987, "clip_ratio/high_mean": 0.0008197991337510757, "clip_ratio/low_mean": 0.0004882724788330961, "clip_ratio/low_min": 3.2800064218463376e-05, "clip_ratio/region_mean": 0.0013080716053082142, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3964.0, "completions/mean_length": 585.388427734375, "completions/mean_terminated_length": 529.6644287109375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.2239067055393587, "grad_norm": 0.14042188227176666, "learning_rate": 1e-06, "loss": 0.0181, "num_tokens": 76247502.0, "reward": 0.625, "reward_std": 0.1896064132452011, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 131 }, { "clip_ratio/high_max": 0.001913049283757573, "clip_ratio/high_mean": 0.0006868303107694373, "clip_ratio/low_mean": 0.00042972276014552335, "clip_ratio/low_min": 1.3957123883301392e-05, "clip_ratio/region_mean": 0.0011165530886501074, "completions/clipped_ratio": 0.012276785714285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 3268.0, "completions/mean_length": 582.15625, "completions/mean_terminated_length": 538.4813842773438, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 1.2332361516034984, "grad_norm": 0.11454752832651138, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 76809554.0, "reward": 0.5915178656578064, "reward_std": 0.16048020124435425, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 132 }, { "clip_ratio/high_max": 0.001901395145978313, "clip_ratio/high_mean": 0.0007389826678263489, "clip_ratio/low_mean": 0.0005679324931406882, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013069151646050159, "completions/clipped_ratio": 0.011160714285714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3139.0, "completions/mean_length": 590.3046875, "completions/mean_terminated_length": 550.737060546875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.2425655976676384, "grad_norm": 0.13019219040870667, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 77388379.0, "reward": 0.5368303656578064, "reward_std": 0.19268646836280823, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 133 }, { "clip_ratio/high_max": 0.0019378843207960017, "clip_ratio/high_mean": 0.0007921883643575711, "clip_ratio/low_mean": 0.0006041999640729045, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013963882774987724, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3501.0, "completions/mean_length": 593.3381958007812, "completions/mean_terminated_length": 545.790771484375, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 1.2518950437317784, "grad_norm": 0.12969744205474854, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 77952122.0, "reward": 0.621651828289032, "reward_std": 0.1883269101381302, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 134 }, { "clip_ratio/high_max": 0.0021529032892431132, "clip_ratio/high_mean": 0.0009231247586285463, "clip_ratio/low_mean": 0.0006282718204602133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001551396617287537, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3528.0, "completions/mean_length": 614.7689819335938, "completions/mean_terminated_length": 527.1406860351562, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.2612244897959184, "grad_norm": 0.3380157947540283, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 78496859.0, "reward": 0.6551339626312256, "reward_std": 0.21185435354709625, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 135 }, { "clip_ratio/high_max": 0.0017971838769881288, "clip_ratio/high_mean": 0.0006118418068581377, "clip_ratio/low_mean": 0.0006130956890046946, "clip_ratio/low_min": 4.8540068746660836e-05, "clip_ratio/region_mean": 0.0012249375104147475, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2587.0, "completions/mean_length": 607.747802734375, "completions/mean_terminated_length": 524.0297241210938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 1.2705539358600584, "grad_norm": 0.12717096507549286, "learning_rate": 1e-06, "loss": -0.0191, "num_tokens": 79046985.0, "reward": 0.5948660969734192, "reward_std": 0.16638249158859253, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 136 }, { "clip_ratio/high_max": 0.002020962674578186, "clip_ratio/high_mean": 0.0007820503360562725, "clip_ratio/low_mean": 0.0005464571549964603, "clip_ratio/low_min": 1.3400514944805764e-05, "clip_ratio/region_mean": 0.0013285074528539553, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3811.0, "completions/mean_length": 644.1551513671875, "completions/mean_terminated_length": 569.3717041015625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.2798833819241984, "grad_norm": 0.12049160152673721, "learning_rate": 1e-06, "loss": 0.0113, "num_tokens": 79632548.0, "reward": 0.5892857313156128, "reward_std": 0.18550649285316467, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 137 }, { "clip_ratio/high_max": 0.0018255292561661918, "clip_ratio/high_mean": 0.0005775526515208185, "clip_ratio/low_mean": 0.0005093980871606618, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010869507277675439, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 636.234375, "completions/mean_terminated_length": 565.3052368164062, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.2892128279883381, "grad_norm": 0.12048103660345078, "learning_rate": 1e-06, "loss": -0.0038, "num_tokens": 80221198.0, "reward": 0.5580357313156128, "reward_std": 0.16288693249225616, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 138 }, { "clip_ratio/high_max": 0.002007623614190379, "clip_ratio/high_mean": 0.0007619925354447332, "clip_ratio/low_mean": 0.0006293182996159885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013913108559790999, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3092.0, "completions/mean_length": 677.138427734375, "completions/mean_terminated_length": 570.9136962890625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 1.2985422740524781, "grad_norm": 0.12649589776992798, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 80813106.0, "reward": 0.520089328289032, "reward_std": 0.19456186890602112, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 139 }, { "clip_ratio/high_max": 0.0020343236610642634, "clip_ratio/high_mean": 0.0007822143088560551, "clip_ratio/low_mean": 0.0005923915932726231, "clip_ratio/low_min": 1.6099947970360518e-05, "clip_ratio/region_mean": 0.0013746059048571624, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3701.0, "completions/mean_length": 698.8170166015625, "completions/mean_terminated_length": 589.2304077148438, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 1.3078717201166181, "grad_norm": 0.1343315690755844, "learning_rate": 1e-06, "loss": -0.0112, "num_tokens": 81417614.0, "reward": 0.520089328289032, "reward_std": 0.20493286848068237, "rewards/verify_math_reward/mean": 0.5200892686843872, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 140 }, { "clip_ratio/high_max": 0.0015934638831822667, "clip_ratio/high_mean": 0.0005796222703793319, "clip_ratio/low_mean": 0.0005065776958872448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010861999762710184, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 671.3125, "completions/mean_terminated_length": 577.0549926757812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.3172011661807579, "grad_norm": 0.11626088619232178, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 82014734.0, "reward": 0.5133928656578064, "reward_std": 0.16897399723529816, "rewards/verify_math_reward/mean": 0.5133928656578064, "rewards/verify_math_reward/std": 0.500099778175354, "step": 141 }, { "clip_ratio/high_max": 0.0015611184171575587, "clip_ratio/high_mean": 0.0006733137779519893, "clip_ratio/low_mean": 0.0006984407709751395, "clip_ratio/low_min": 5.065845380158862e-05, "clip_ratio/region_mean": 0.0013717545552935917, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2779.0, "completions/mean_length": 690.3348388671875, "completions/mean_terminated_length": 592.583251953125, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 1.3265306122448979, "grad_norm": 0.1328561007976532, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 82624282.0, "reward": 0.5446428656578064, "reward_std": 0.2069302648305893, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 142 }, { "clip_ratio/high_max": 0.002104275714373216, "clip_ratio/high_mean": 0.0007949095033836784, "clip_ratio/low_mean": 0.0006041574306436814, "clip_ratio/low_min": 1.7433751054340973e-05, "clip_ratio/region_mean": 0.0013990669249324128, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 663.1194458007812, "completions/mean_terminated_length": 560.527587890625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.3358600583090379, "grad_norm": 0.14331470429897308, "learning_rate": 1e-06, "loss": -0.0046, "num_tokens": 83203509.0, "reward": 0.5212053656578064, "reward_std": 0.18919385969638824, "rewards/verify_math_reward/mean": 0.5212053656578064, "rewards/verify_math_reward/std": 0.49982914328575134, "step": 143 }, { "clip_ratio/high_max": 0.0015646205501980148, "clip_ratio/high_mean": 0.0006571413559868233, "clip_ratio/low_mean": 0.000618994738033507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001276136092201341, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 686.2879638671875, "completions/mean_terminated_length": 608.4406127929688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.3451895043731779, "grad_norm": 0.12529589235782623, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 83832719.0, "reward": 0.546875, "reward_std": 0.19839440286159515, "rewards/verify_math_reward/mean": 0.546875, "rewards/verify_math_reward/std": 0.4980759024620056, "step": 144 }, { "clip_ratio/high_max": 0.0019495428350637667, "clip_ratio/high_mean": 0.000920495573154767, "clip_ratio/low_mean": 0.0005835863939864794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015040819671412464, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2413.0, "completions/mean_length": 619.0346069335938, "completions/mean_terminated_length": 523.3382568359375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 1.3545189504373178, "grad_norm": 0.14971975982189178, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 84375726.0, "reward": 0.6383928656578064, "reward_std": 0.1888914555311203, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 145 }, { "clip_ratio/high_max": 0.0017173868873214815, "clip_ratio/high_mean": 0.0007454751685145311, "clip_ratio/low_mean": 0.0006423831055144547, "clip_ratio/low_min": 2.6167050236836076e-05, "clip_ratio/region_mean": 0.0013878582940378692, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3075.0, "completions/mean_length": 675.0267944335938, "completions/mean_terminated_length": 580.8715209960938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.3638483965014578, "grad_norm": 0.13224931061267853, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 84985630.0, "reward": 0.5725446939468384, "reward_std": 0.1929485946893692, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 146 }, { "clip_ratio/high_max": 0.002161765700293472, "clip_ratio/high_mean": 0.0007993987874215236, "clip_ratio/low_mean": 0.00048263823282468366, "clip_ratio/low_min": 9.279880941903684e-06, "clip_ratio/region_mean": 0.0012820370502595324, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 669.5881958007812, "completions/mean_terminated_length": 587.3543090820312, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 1.3731778425655976, "grad_norm": 0.13312533497810364, "learning_rate": 1e-06, "loss": 0.0146, "num_tokens": 85587173.0, "reward": 0.5546875, "reward_std": 0.1856580376625061, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 147 }, { "clip_ratio/high_max": 0.0020616715628420934, "clip_ratio/high_mean": 0.0009268377507396508, "clip_ratio/low_mean": 0.0005328221686795587, "clip_ratio/low_min": 1.1036552677978761e-05, "clip_ratio/region_mean": 0.0014596599612559658, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 718.458740234375, "completions/mean_terminated_length": 593.3645629882812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.3825072886297376, "grad_norm": 0.13047988712787628, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 86183288.0, "reward": 0.574776828289032, "reward_std": 0.21199268102645874, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 148 }, { "clip_ratio/high_max": 0.0016880180082807783, "clip_ratio/high_mean": 0.0005727906318497844, "clip_ratio/low_mean": 0.000599437033997674, "clip_ratio/low_min": 1.3205155482864939e-05, "clip_ratio/region_mean": 0.0011722276831278577, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 607.1004638671875, "completions/mean_terminated_length": 543.6658935546875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.3918367346938776, "grad_norm": 0.13434885442256927, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 86746338.0, "reward": 0.6037946939468384, "reward_std": 0.18118859827518463, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 149 }, { "clip_ratio/high_max": 0.0016494169176439755, "clip_ratio/high_mean": 0.0005675286411133129, "clip_ratio/low_mean": 0.0005270805522741284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010946092043013778, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3588.0, "completions/mean_length": 652.505615234375, "completions/mean_terminated_length": 569.8616943359375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 1.4011661807580174, "grad_norm": 0.11531613022089005, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 87341223.0, "reward": 0.6183035969734192, "reward_std": 0.15431898832321167, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 150 }, { "clip_ratio/high_max": 0.0018470843424438499, "clip_ratio/high_mean": 0.0006419708988687489, "clip_ratio/low_mean": 0.0006261602720769588, "clip_ratio/low_min": 5.929211329203099e-05, "clip_ratio/region_mean": 0.0012681312073254958, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3478.0, "completions/mean_length": 686.271240234375, "completions/mean_terminated_length": 576.2799682617188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.4104956268221573, "grad_norm": 0.1235036551952362, "learning_rate": 1e-06, "loss": 0.0208, "num_tokens": 87938634.0, "reward": 0.5580357313156128, "reward_std": 0.1676594614982605, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 151 }, { "clip_ratio/high_max": 0.0019267288444098085, "clip_ratio/high_mean": 0.0007970599690452218, "clip_ratio/low_mean": 0.0005695634208677802, "clip_ratio/low_min": 3.3647374948486686e-05, "clip_ratio/region_mean": 0.0013666233644471504, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 608.6015625, "completions/mean_terminated_length": 553.2460327148438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.4198250728862973, "grad_norm": 0.13221563398838043, "learning_rate": 1e-06, "loss": 0.0088, "num_tokens": 88511077.0, "reward": 0.6417410969734192, "reward_std": 0.19846788048744202, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 152 }, { "clip_ratio/high_max": 0.001906688241433585, "clip_ratio/high_mean": 0.0006397882425517309, "clip_ratio/low_mean": 0.00047918893233145354, "clip_ratio/low_min": 1.8027112673735246e-05, "clip_ratio/region_mean": 0.0011189771830686368, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3324.0, "completions/mean_length": 645.466552734375, "completions/mean_terminated_length": 550.4976806640625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.4291545189504373, "grad_norm": 0.1337098330259323, "learning_rate": 1e-06, "loss": 0.0115, "num_tokens": 89080335.0, "reward": 0.5680803656578064, "reward_std": 0.18040582537651062, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200122833252, "step": 153 }, { "clip_ratio/high_max": 0.0015368815656984225, "clip_ratio/high_mean": 0.0005800444414489903, "clip_ratio/low_mean": 0.0005559555738727795, "clip_ratio/low_min": 1.67291218531318e-05, "clip_ratio/region_mean": 0.0011360000135027803, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3132.0, "completions/mean_length": 623.3158569335938, "completions/mean_terminated_length": 552.1218872070312, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.4384839650145773, "grad_norm": 0.12626437842845917, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 89647626.0, "reward": 0.6127232313156128, "reward_std": 0.16555652022361755, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 154 }, { "clip_ratio/high_max": 0.001931624261487741, "clip_ratio/high_mean": 0.0007596589457534719, "clip_ratio/low_mean": 0.0003546154935065715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011142744442622643, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 590.685302734375, "completions/mean_terminated_length": 522.8919067382812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 1.4478134110787173, "grad_norm": 0.12856322526931763, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 90191576.0, "reward": 0.6517857313156128, "reward_std": 0.17574280500411987, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667038440704346, "step": 155 }, { "clip_ratio/high_max": 0.00201725831720978, "clip_ratio/high_mean": 0.0007344376808759989, "clip_ratio/low_mean": 0.0006907381075507146, "clip_ratio/low_min": 1.090560090233339e-05, "clip_ratio/region_mean": 0.0014251758184400387, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 594.6752319335938, "completions/mean_terminated_length": 535.0613403320312, "completions/min_length": 198.0, "completions/min_terminated_length": 198.0, "epoch": 1.457142857142857, "grad_norm": 0.13053034245967865, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 90752269.0, "reward": 0.625, "reward_std": 0.18832510709762573, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 156 }, { "clip_ratio/high_max": 0.0017435079353163019, "clip_ratio/high_mean": 0.0007276815913428436, "clip_ratio/low_mean": 0.0005316176857377286, "clip_ratio/low_min": 1.160846932179993e-05, "clip_ratio/region_mean": 0.0012592993043654133, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 704.2031860351562, "completions/mean_terminated_length": 610.8508911132812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.466472303206997, "grad_norm": 0.13034747540950775, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 91376435.0, "reward": 0.5412946939468384, "reward_std": 0.2033594399690628, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 157 }, { "clip_ratio/high_max": 0.0021951078888378106, "clip_ratio/high_mean": 0.0008293294667964801, "clip_ratio/low_mean": 0.0006260247982936562, "clip_ratio/low_min": 2.0167795810266398e-05, "clip_ratio/region_mean": 0.0014553542496287264, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 678.5111694335938, "completions/mean_terminated_length": 592.4873657226562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.475801749271137, "grad_norm": 0.13196291029453278, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 91985125.0, "reward": 0.5703125, "reward_std": 0.18870559334754944, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 158 }, { "clip_ratio/high_max": 0.001829417687986279, "clip_ratio/high_mean": 0.000784383266363875, "clip_ratio/low_mean": 0.0004983877370250411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001282771016121842, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3707.0, "completions/mean_length": 682.1328125, "completions/mean_terminated_length": 600.2000122070312, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 1.485131195335277, "grad_norm": 0.1347026824951172, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 92588844.0, "reward": 0.59375, "reward_std": 0.20260171592235565, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 159 }, { "clip_ratio/high_max": 0.0017668598411546554, "clip_ratio/high_mean": 0.000813364764326252, "clip_ratio/low_mean": 0.0005534497195185395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013668144310940988, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2744.0, "completions/mean_length": 656.771240234375, "completions/mean_terminated_length": 570.2001953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.4944606413994168, "grad_norm": 0.13396763801574707, "learning_rate": 1e-06, "loss": -0.0139, "num_tokens": 93177303.0, "reward": 0.6127232313156128, "reward_std": 0.20827394723892212, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 160 }, { "clip_ratio/high_max": 0.0019355226613697596, "clip_ratio/high_mean": 0.0007956197114253882, "clip_ratio/low_mean": 0.0005516102828551084, "clip_ratio/low_min": 5.0502947487984784e-05, "clip_ratio/region_mean": 0.001347230005194433, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2103.0, "completions/mean_length": 639.8471069335938, "completions/mean_terminated_length": 573.0045166015625, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 1.5037900874635568, "grad_norm": 0.1301068812608719, "learning_rate": 1e-06, "loss": 0.0099, "num_tokens": 93763366.0, "reward": 0.6071428656578064, "reward_std": 0.1840411126613617, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865824937820435, "step": 161 }, { "clip_ratio/high_max": 0.001923288309626514, "clip_ratio/high_mean": 0.000747525396036508, "clip_ratio/low_mean": 0.0004745077794723329, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012220331627759151, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3448.0, "completions/mean_length": 704.0569458007812, "completions/mean_terminated_length": 602.6884765625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 1.5131195335276968, "grad_norm": 0.1191791296005249, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 94383145.0, "reward": 0.5368303656578064, "reward_std": 0.197902649641037, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 162 }, { "clip_ratio/high_max": 0.0023761102493153885, "clip_ratio/high_mean": 0.00098472046374809, "clip_ratio/low_mean": 0.00046745350573473843, "clip_ratio/low_min": 1.3658216630574316e-05, "clip_ratio/region_mean": 0.0014521739576593973, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1893.0, "completions/mean_length": 610.7467041015625, "completions/mean_terminated_length": 555.4251708984375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 1.5224489795918368, "grad_norm": 0.15298490226268768, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 94974646.0, "reward": 0.543526828289032, "reward_std": 0.22872062027454376, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 163 }, { "clip_ratio/high_max": 0.0015781065812916495, "clip_ratio/high_mean": 0.0004595067603077041, "clip_ratio/low_mean": 0.000494794807764265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009543015621602535, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 620.7455444335938, "completions/mean_terminated_length": 525.0963134765625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 1.5317784256559768, "grad_norm": 0.10810443013906479, "learning_rate": 1e-06, "loss": -0.003, "num_tokens": 95526018.0, "reward": 0.59375, "reward_std": 0.12805740535259247, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 164 }, { "clip_ratio/high_max": 0.0017954709655896295, "clip_ratio/high_mean": 0.0007386102424788987, "clip_ratio/low_mean": 0.0005059665991211659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012445768479665276, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3908.0, "completions/mean_length": 688.3214721679688, "completions/mean_terminated_length": 602.5446166992188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 1.5411078717201168, "grad_norm": 0.1268397867679596, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 96144530.0, "reward": 0.5602678656578064, "reward_std": 0.18336710333824158, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 165 }, { "clip_ratio/high_max": 0.0020396360268932767, "clip_ratio/high_mean": 0.0006896339455124689, "clip_ratio/low_mean": 0.0006078881506255129, "clip_ratio/low_min": 1.302083364862483e-05, "clip_ratio/region_mean": 0.0012975220706721302, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2533.0, "completions/mean_length": 587.96875, "completions/mean_terminated_length": 516.0501098632812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.5504373177842565, "grad_norm": 0.13565339148044586, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 96691182.0, "reward": 0.5993303656578064, "reward_std": 0.16999132931232452, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 166 }, { "clip_ratio/high_max": 0.001906873680127319, "clip_ratio/high_mean": 0.0008471759992971784, "clip_ratio/low_mean": 0.0005641269253828796, "clip_ratio/low_min": 2.4888242478482425e-05, "clip_ratio/region_mean": 0.001411302902852185, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3258.0, "completions/mean_length": 644.575927734375, "completions/mean_terminated_length": 553.6449584960938, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.5597667638483965, "grad_norm": 0.14925526082515717, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 97257418.0, "reward": 0.6082589626312256, "reward_std": 0.22755201160907745, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 167 }, { "clip_ratio/high_max": 0.0015691809821873903, "clip_ratio/high_mean": 0.0006319213207461871, "clip_ratio/low_mean": 0.0004532409575404017, "clip_ratio/low_min": 1.1886649190273602e-05, "clip_ratio/region_mean": 0.0010851622682821471, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3579.0, "completions/mean_length": 680.5089721679688, "completions/mean_terminated_length": 606.5131225585938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.5690962099125363, "grad_norm": 0.12171467393636703, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 97873674.0, "reward": 0.5636160969734192, "reward_std": 0.17487628757953644, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 168 }, { "clip_ratio/high_max": 0.0019097938602499198, "clip_ratio/high_mean": 0.0006869558783364482, "clip_ratio/low_mean": 0.0005390265587266185, "clip_ratio/low_min": 1.3266821952129249e-05, "clip_ratio/region_mean": 0.001225982417963678, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 632.9319458007812, "completions/mean_terminated_length": 553.8663940429688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.5784256559766763, "grad_norm": 0.14499714970588684, "learning_rate": 1e-06, "loss": 0.0114, "num_tokens": 98446661.0, "reward": 0.578125, "reward_std": 0.1948240101337433, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 169 }, { "clip_ratio/high_max": 0.0016660082401358522, "clip_ratio/high_mean": 0.000690355134793208, "clip_ratio/low_mean": 0.0005471073127409909, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001237462452991167, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 651.825927734375, "completions/mean_terminated_length": 581.2164306640625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 1.5877551020408163, "grad_norm": 0.13439850509166718, "learning_rate": 1e-06, "loss": 0.0111, "num_tokens": 99046137.0, "reward": 0.5959821939468384, "reward_std": 0.1863390952348709, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 170 }, { "clip_ratio/high_max": 0.0022043542776373215, "clip_ratio/high_mean": 0.000894695227543707, "clip_ratio/low_mean": 0.0007906444052423467, "clip_ratio/low_min": 0.00010231894702883437, "clip_ratio/region_mean": 0.0016853396155056544, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 648.1439819335938, "completions/mean_terminated_length": 573.4469604492188, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.5970845481049563, "grad_norm": 0.14859704673290253, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 99640138.0, "reward": 0.5848214626312256, "reward_std": 0.22000113129615784, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 171 }, { "clip_ratio/high_max": 0.0019844864873448387, "clip_ratio/high_mean": 0.0009608767031750176, "clip_ratio/low_mean": 0.0005933990096309572, "clip_ratio/low_min": 7.017723510216456e-05, "clip_ratio/region_mean": 0.0015542756736977026, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 684.0424194335938, "completions/mean_terminated_length": 594.1512451171875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.6064139941690962, "grad_norm": 0.14803965389728546, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 100250584.0, "reward": 0.606026828289032, "reward_std": 0.23868004977703094, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890191316604614, "step": 172 }, { "clip_ratio/high_max": 0.001996364320802968, "clip_ratio/high_mean": 0.0008315825980389491, "clip_ratio/low_mean": 0.00047165330761345103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001303235887462506, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3660.0, "completions/mean_length": 670.2076416015625, "completions/mean_terminated_length": 551.5311889648438, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 1.6157434402332362, "grad_norm": 0.14180468022823334, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 100808658.0, "reward": 0.6127232313156128, "reward_std": 0.19099578261375427, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 173 }, { "clip_ratio/high_max": 0.0015758375484438147, "clip_ratio/high_mean": 0.0005748821095039602, "clip_ratio/low_mean": 0.0006755555532436119, "clip_ratio/low_min": 3.074549567827489e-05, "clip_ratio/region_mean": 0.0012504376863944344, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3215.0, "completions/mean_length": 641.6752319335938, "completions/mean_terminated_length": 550.6678466796875, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 1.6250728862973762, "grad_norm": 0.1388065218925476, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 101379703.0, "reward": 0.5647321939468384, "reward_std": 0.19498512148857117, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 174 }, { "clip_ratio/high_max": 0.0017157134316221345, "clip_ratio/high_mean": 0.0006739685177308274, "clip_ratio/low_mean": 0.00044784355395677267, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011218121253477875, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3381.0, "completions/mean_length": 663.0892944335938, "completions/mean_terminated_length": 576.6773071289062, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 1.634402332361516, "grad_norm": 0.12144182622432709, "learning_rate": 1e-06, "loss": 0.0137, "num_tokens": 101992087.0, "reward": 0.5714285969734192, "reward_std": 0.18536199629306793, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 175 }, { "clip_ratio/high_max": 0.0019562089692044538, "clip_ratio/high_mean": 0.000716243475835654, "clip_ratio/low_mean": 0.0006760624455637299, "clip_ratio/low_min": 3.2546440706937574e-05, "clip_ratio/region_mean": 0.0013923059159424156, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3865.0, "completions/mean_length": 667.140625, "completions/mean_terminated_length": 596.8451538085938, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 1.643731778425656, "grad_norm": 0.14351120591163635, "learning_rate": 1e-06, "loss": 0.02, "num_tokens": 102606421.0, "reward": 0.5959821939468384, "reward_std": 0.22338497638702393, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 176 }, { "clip_ratio/high_max": 0.0014324550465971697, "clip_ratio/high_mean": 0.0005830946192872943, "clip_ratio/low_mean": 0.00042451115496078273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010076057878904976, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1928.0, "completions/mean_length": 662.0335083007812, "completions/mean_terminated_length": 571.5624389648438, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.6530612244897958, "grad_norm": 0.1510154753923416, "learning_rate": 1e-06, "loss": -0.0063, "num_tokens": 103196011.0, "reward": 0.559151828289032, "reward_std": 0.15965604782104492, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 177 }, { "clip_ratio/high_max": 0.0016313164414896164, "clip_ratio/high_mean": 0.0006895035476190969, "clip_ratio/low_mean": 0.00042845612392738985, "clip_ratio/low_min": 5.960425278317416e-05, "clip_ratio/region_mean": 0.0011179596513102297, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 668.107177734375, "completions/mean_terminated_length": 557.5299682617188, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.6623906705539357, "grad_norm": 0.13701440393924713, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 103763403.0, "reward": 0.6272321939468384, "reward_std": 0.18814215064048767, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 178 }, { "clip_ratio/high_max": 0.0016562871787755284, "clip_ratio/high_mean": 0.0006508016022053198, "clip_ratio/low_mean": 0.0005484965413415921, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011992981671937741, "completions/clipped_ratio": 0.014508928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 604.0223388671875, "completions/mean_terminated_length": 552.6115112304688, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 1.6717201166180757, "grad_norm": 0.13368184864521027, "learning_rate": 1e-06, "loss": 0.0106, "num_tokens": 104333543.0, "reward": 0.6037946939468384, "reward_std": 0.18080882728099823, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 179 }, { "clip_ratio/high_max": 0.0015244390233419836, "clip_ratio/high_mean": 0.0006030370268490515, "clip_ratio/low_mean": 0.0004817474000446964, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010847844405361684, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3576.0, "completions/mean_length": 622.0826416015625, "completions/mean_terminated_length": 534.638427734375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.6810495626822157, "grad_norm": 0.13400602340698242, "learning_rate": 1e-06, "loss": -0.0059, "num_tokens": 104887721.0, "reward": 0.5859375, "reward_std": 0.1712280660867691, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 180 }, { "clip_ratio/high_max": 0.0018641551541804802, "clip_ratio/high_mean": 0.0006986685366427992, "clip_ratio/low_mean": 0.0005265117324597668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012251802727405448, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2528.0, "completions/mean_length": 634.6361694335938, "completions/mean_terminated_length": 551.5634155273438, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 1.6903790087463557, "grad_norm": 0.14882251620292664, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 105453187.0, "reward": 0.6339285969734192, "reward_std": 0.17780296504497528, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 181 }, { "clip_ratio/high_max": 0.0018991185024788138, "clip_ratio/high_mean": 0.0006759333355148556, "clip_ratio/low_mean": 0.0006143502687336877, "clip_ratio/low_min": 9.475439583184198e-06, "clip_ratio/region_mean": 0.0012902836278954055, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 758.9174194335938, "completions/mean_terminated_length": 631.3117065429688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 1.6997084548104957, "grad_norm": 0.1366676241159439, "learning_rate": 1e-06, "loss": 0.0184, "num_tokens": 106102513.0, "reward": 0.512276828289032, "reward_std": 0.18652385473251343, "rewards/verify_math_reward/mean": 0.5122767686843872, "rewards/verify_math_reward/std": 0.500128448009491, "step": 182 }, { "clip_ratio/high_max": 0.0019408968910283875, "clip_ratio/high_mean": 0.0007383292740996694, "clip_ratio/low_mean": 0.0005783888327641762, "clip_ratio/low_min": 1.0807539183588233e-05, "clip_ratio/region_mean": 0.001316718124144245, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3244.0, "completions/mean_length": 581.9732666015625, "completions/mean_terminated_length": 509.9316711425781, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 1.7090379008746357, "grad_norm": 0.1475510448217392, "learning_rate": 1e-06, "loss": 0.0112, "num_tokens": 106646009.0, "reward": 0.6439732313156128, "reward_std": 0.19053933024406433, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 183 }, { "clip_ratio/high_max": 0.002301610446011182, "clip_ratio/high_mean": 0.0007948559814394685, "clip_ratio/low_mean": 0.0005266468115223688, "clip_ratio/low_min": 1.1428049219830427e-05, "clip_ratio/region_mean": 0.0013215027829573955, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 694.8717041015625, "completions/mean_terminated_length": 605.2658081054688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 1.7183673469387755, "grad_norm": 0.1307675987482071, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 107267254.0, "reward": 0.5725446939468384, "reward_std": 0.17975644767284393, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 184 }, { "clip_ratio/high_max": 0.0016229440116148908, "clip_ratio/high_mean": 0.0005987291697238106, "clip_ratio/low_mean": 0.0004522435929175117, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00105097275445587, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 699.0636596679688, "completions/mean_terminated_length": 589.4850463867188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 1.7276967930029155, "grad_norm": 0.1379019021987915, "learning_rate": 1e-06, "loss": 0.0122, "num_tokens": 107864711.0, "reward": 0.5491071939468384, "reward_std": 0.17472361028194427, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 185 }, { "clip_ratio/high_max": 0.0015986760408850387, "clip_ratio/high_mean": 0.0006530470236612018, "clip_ratio/low_mean": 0.0005655967963775765, "clip_ratio/low_min": 2.2465852453024127e-05, "clip_ratio/region_mean": 0.0012186437888885848, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 660.578125, "completions/mean_terminated_length": 578.1279907226562, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.7370262390670554, "grad_norm": 0.13076432049274445, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 108462421.0, "reward": 0.5066964626312256, "reward_std": 0.19260655343532562, "rewards/verify_math_reward/mean": 0.5066964030265808, "rewards/verify_math_reward/std": 0.5002344250679016, "step": 186 }, { "clip_ratio/high_max": 0.001723245884932112, "clip_ratio/high_mean": 0.0006323671332211234, "clip_ratio/low_mean": 0.0005192197158976342, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011515868463902734, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2299.0, "completions/mean_length": 605.864990234375, "completions/mean_terminated_length": 530.251953125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.7463556851311952, "grad_norm": 0.13046713173389435, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 109015196.0, "reward": 0.6540178656578064, "reward_std": 0.15146467089653015, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 187 }, { "clip_ratio/high_max": 0.0014721749957971042, "clip_ratio/high_mean": 0.0006529896340907726, "clip_ratio/low_mean": 0.0005742745020143047, "clip_ratio/low_min": 2.578914791229181e-05, "clip_ratio/region_mean": 0.0012272641288291197, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2710.0, "completions/mean_length": 713.8125610351562, "completions/mean_terminated_length": 608.727294921875, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 1.7556851311953352, "grad_norm": 0.13606221973896027, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 109635196.0, "reward": 0.5636160969734192, "reward_std": 0.18332546949386597, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 188 }, { "clip_ratio/high_max": 0.0022245493601076305, "clip_ratio/high_mean": 0.0008970431299530901, "clip_ratio/low_mean": 0.0006182360411912668, "clip_ratio/low_min": 1.0991910130542237e-05, "clip_ratio/region_mean": 0.0015152791602304205, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3470.0, "completions/mean_length": 647.6417846679688, "completions/mean_terminated_length": 576.9464721679688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 1.7650145772594752, "grad_norm": 0.15010227262973785, "learning_rate": 1e-06, "loss": 0.0162, "num_tokens": 110238443.0, "reward": 0.59375, "reward_std": 0.19418713450431824, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 189 }, { "clip_ratio/high_max": 0.001977579278900521, "clip_ratio/high_mean": 0.0007247905523399822, "clip_ratio/low_mean": 0.0005474706013046671, "clip_ratio/low_min": 2.3674318981647957e-05, "clip_ratio/region_mean": 0.0012722611572826281, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 741.0201416015625, "completions/mean_terminated_length": 616.7615966796875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 1.7743440233236152, "grad_norm": 0.12722791731357574, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 110856109.0, "reward": 0.5870535969734192, "reward_std": 0.19825245440006256, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 190 }, { "clip_ratio/high_max": 0.0017347467874060385, "clip_ratio/high_mean": 0.0006408966291928664, "clip_ratio/low_mean": 0.000596689736994449, "clip_ratio/low_min": 4.890091440756805e-05, "clip_ratio/region_mean": 0.0012375863698252942, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2890.0, "completions/mean_length": 638.685302734375, "completions/mean_terminated_length": 539.451171875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 1.7836734693877552, "grad_norm": 0.13227508962154388, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 111416323.0, "reward": 0.6272321939468384, "reward_std": 0.15597616136074066, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 191 }, { "clip_ratio/high_max": 0.0022329735547828022, "clip_ratio/high_mean": 0.0008203545858123107, "clip_ratio/low_mean": 0.0005361180983527447, "clip_ratio/low_min": 1.538272226753179e-05, "clip_ratio/region_mean": 0.0013564726687036455, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2336.0, "completions/mean_length": 650.591552734375, "completions/mean_terminated_length": 547.6253051757812, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 1.7930029154518952, "grad_norm": 0.14059096574783325, "learning_rate": 1e-06, "loss": -0.0052, "num_tokens": 111976405.0, "reward": 0.5870535969734192, "reward_std": 0.1956181675195694, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 192 }, { "clip_ratio/high_max": 0.0020801638493139762, "clip_ratio/high_mean": 0.0007264574742293917, "clip_ratio/low_mean": 0.0004943578360325773, "clip_ratio/low_min": 2.9367339266173076e-05, "clip_ratio/region_mean": 0.0012208152911625803, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 735.6205444335938, "completions/mean_terminated_length": 615.1907348632812, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 1.802332361516035, "grad_norm": 0.12742440402507782, "learning_rate": 1e-06, "loss": -0.0054, "num_tokens": 112601041.0, "reward": 0.5412946939468384, "reward_std": 0.16578517854213715, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 193 }, { "clip_ratio/high_max": 0.0020587184917530976, "clip_ratio/high_mean": 0.0008120808706735261, "clip_ratio/low_mean": 0.0005044134632044006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013164943629817571, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2822.0, "completions/mean_length": 635.114990234375, "completions/mean_terminated_length": 527.5845947265625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 1.811661807580175, "grad_norm": 0.13608218729496002, "learning_rate": 1e-06, "loss": -0.0228, "num_tokens": 113144968.0, "reward": 0.613839328289032, "reward_std": 0.17926861345767975, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 194 }, { "clip_ratio/high_max": 0.0019229853060096502, "clip_ratio/high_mean": 0.0007687071210966678, "clip_ratio/low_mean": 0.0005006855881219963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001269392749236431, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3543.0, "completions/mean_length": 658.7176513671875, "completions/mean_terminated_length": 555.9942626953125, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 1.820991253644315, "grad_norm": 0.1397802084684372, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 113713067.0, "reward": 0.5848214626312256, "reward_std": 0.1881396323442459, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 195 }, { "clip_ratio/high_max": 0.0014796245559409726, "clip_ratio/high_mean": 0.0005814004280182417, "clip_ratio/low_mean": 0.0004082226628270291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009896231149468804, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3957.0, "completions/mean_length": 698.755615234375, "completions/mean_terminated_length": 585.1222534179688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 1.8303206997084547, "grad_norm": 0.11784183979034424, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 114303240.0, "reward": 0.637276828289032, "reward_std": 0.14492550492286682, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 196 }, { "clip_ratio/high_max": 0.001966052717762068, "clip_ratio/high_mean": 0.0006731036294240766, "clip_ratio/low_mean": 0.0005578692271228647, "clip_ratio/low_min": 1.4256386748456862e-05, "clip_ratio/region_mean": 0.001230972873599967, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 730.2377319335938, "completions/mean_terminated_length": 625.662841796875, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 1.8396501457725947, "grad_norm": 0.14265641570091248, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 114947749.0, "reward": 0.4654017984867096, "reward_std": 0.2022992968559265, "rewards/verify_math_reward/mean": 0.4654017984867096, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 197 }, { "clip_ratio/high_max": 0.0017364472514600493, "clip_ratio/high_mean": 0.0004902536475128727, "clip_ratio/low_mean": 0.0005718366019209498, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010620902430673596, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3200.0, "completions/mean_length": 669.8772583007812, "completions/mean_terminated_length": 559.3571166992188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 1.8489795918367347, "grad_norm": 0.1364136040210724, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 115528807.0, "reward": 0.5089285969734192, "reward_std": 0.15800705552101135, "rewards/verify_math_reward/mean": 0.5089285969734192, "rewards/verify_math_reward/std": 0.5001994967460632, "step": 198 }, { "clip_ratio/high_max": 0.002096293719660025, "clip_ratio/high_mean": 0.0007575220151920803, "clip_ratio/low_mean": 0.0005292972218740033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001286819257074967, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3367.0, "completions/mean_length": 654.5960083007812, "completions/mean_terminated_length": 539.4855346679688, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 1.8583090379008746, "grad_norm": 0.14795687794685364, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 116077109.0, "reward": 0.590401828289032, "reward_std": 0.17405030131340027, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 199 }, { "clip_ratio/high_max": 0.0022372160965460353, "clip_ratio/high_mean": 0.0008441779791610315, "clip_ratio/low_mean": 0.00047425334059880697, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013184313211240806, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 693.1585083007812, "completions/mean_terminated_length": 587.4315185546875, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 1.8676384839650146, "grad_norm": 0.1414903700351715, "learning_rate": 1e-06, "loss": -0.0079, "num_tokens": 116678979.0, "reward": 0.645089328289032, "reward_std": 0.18137337267398834, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 200 }, { "clip_ratio/high_max": 0.002172154694562778, "clip_ratio/high_mean": 0.0008787740134721389, "clip_ratio/low_mean": 0.0006796053594371188, "clip_ratio/low_min": 6.416153973987093e-05, "clip_ratio/region_mean": 0.001558379353809869, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2580.0, "completions/mean_length": 621.599365234375, "completions/mean_terminated_length": 550.3701782226562, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 1.8769679300291546, "grad_norm": 0.15065741539001465, "learning_rate": 1e-06, "loss": 0.0035, "num_tokens": 117242820.0, "reward": 0.590401828289032, "reward_std": 0.22236761450767517, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 201 }, { "clip_ratio/high_max": 0.001663742968958104, "clip_ratio/high_mean": 0.0007463917900167871, "clip_ratio/low_mean": 0.0005143352955201408, "clip_ratio/low_min": 1.0354539881518576e-05, "clip_ratio/region_mean": 0.0012607270437001716, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3838.0, "completions/mean_length": 722.099365234375, "completions/mean_terminated_length": 605.2205200195312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 1.8862973760932946, "grad_norm": 0.1430848240852356, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 117847021.0, "reward": 0.5837053656578064, "reward_std": 0.19399844110012054, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 202 }, { "clip_ratio/high_max": 0.002206108252721606, "clip_ratio/high_mean": 0.0007869080991440569, "clip_ratio/low_mean": 0.00048120848441612907, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012681166044785641, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 683.8627319335938, "completions/mean_terminated_length": 577.8469848632812, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 1.8956268221574344, "grad_norm": 0.14681637287139893, "learning_rate": 1e-06, "loss": -0.0017, "num_tokens": 118440962.0, "reward": 0.543526828289032, "reward_std": 0.18483206629753113, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 203 }, { "clip_ratio/high_max": 0.002019617433688836, "clip_ratio/high_mean": 0.0007471297667507315, "clip_ratio/low_mean": 0.0005144346114320797, "clip_ratio/low_min": 1.2245297511981335e-05, "clip_ratio/region_mean": 0.0012615643536264542, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3194.0, "completions/mean_length": 711.247802734375, "completions/mean_terminated_length": 589.9445190429688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 1.9049562682215744, "grad_norm": 0.15287570655345917, "learning_rate": 1e-06, "loss": -0.0109, "num_tokens": 119048216.0, "reward": 0.5758928656578064, "reward_std": 0.19024580717086792, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 204 }, { "clip_ratio/high_max": 0.0020018596915178932, "clip_ratio/high_mean": 0.000648028108116705, "clip_ratio/low_mean": 0.0004865585397055838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001134586625994416, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1974.0, "completions/mean_length": 692.755615234375, "completions/mean_terminated_length": 574.8602905273438, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 1.9142857142857141, "grad_norm": 0.13454777002334595, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 119631861.0, "reward": 0.5412946939468384, "reward_std": 0.16961295902729034, "rewards/verify_math_reward/mean": 0.5412946343421936, "rewards/verify_math_reward/std": 0.49857014417648315, "step": 205 }, { "clip_ratio/high_max": 0.0017946812877198681, "clip_ratio/high_mean": 0.0006605059361390886, "clip_ratio/low_mean": 0.0004883128474375553, "clip_ratio/low_min": 1.0580666639725678e-05, "clip_ratio/region_mean": 0.0011488187810755335, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2389.0, "completions/mean_length": 734.0592041015625, "completions/mean_terminated_length": 633.5873413085938, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 1.9236151603498541, "grad_norm": 0.12182007730007172, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 120262706.0, "reward": 0.5725446939468384, "reward_std": 0.17675015330314636, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 206 }, { "clip_ratio/high_max": 0.0017613017371331807, "clip_ratio/high_mean": 0.000660313854496053, "clip_ratio/low_mean": 0.00043818357789859874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010984974178427365, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 678.669677734375, "completions/mean_terminated_length": 588.6369018554688, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 1.9329446064139941, "grad_norm": 0.14148029685020447, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 120870034.0, "reward": 0.6149553656578064, "reward_std": 0.15826597809791565, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 207 }, { "clip_ratio/high_max": 0.0016052682476583868, "clip_ratio/high_mean": 0.0007449024196830578, "clip_ratio/low_mean": 0.0006256066963032936, "clip_ratio/low_min": 6.049065268598497e-05, "clip_ratio/region_mean": 0.0013705091005249415, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3599.0, "completions/mean_length": 695.5413208007812, "completions/mean_terminated_length": 633.7147827148438, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 1.9422740524781341, "grad_norm": 0.13558965921401978, "learning_rate": 1e-06, "loss": 0.0108, "num_tokens": 121518695.0, "reward": 0.5558035969734192, "reward_std": 0.2175946682691574, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715369939804077, "step": 208 }, { "clip_ratio/high_max": 0.001782178060238948, "clip_ratio/high_mean": 0.0006993906808929751, "clip_ratio/low_mean": 0.0005009642336517572, "clip_ratio/low_min": 3.2224801543634385e-05, "clip_ratio/region_mean": 0.0012003549163637217, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1833.0, "completions/mean_length": 579.3560791015625, "completions/mean_terminated_length": 515.4170532226562, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 1.951603498542274, "grad_norm": 0.14529386162757874, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 122057486.0, "reward": 0.6863839626312256, "reward_std": 0.1657840609550476, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 209 }, { "clip_ratio/high_max": 0.0017631303999223746, "clip_ratio/high_mean": 0.0007128642464522272, "clip_ratio/low_mean": 0.0004985708683307166, "clip_ratio/low_min": 1.0222440323559567e-05, "clip_ratio/region_mean": 0.0012114351120544598, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3927.0, "completions/mean_length": 645.0803833007812, "completions/mean_terminated_length": 546.0298461914062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 1.960932944606414, "grad_norm": 0.15338926017284393, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 122615702.0, "reward": 0.6506696939468384, "reward_std": 0.17303253710269928, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 210 }, { "clip_ratio/high_max": 0.0017781652713892981, "clip_ratio/high_mean": 0.0007072143907862483, "clip_ratio/low_mean": 0.00048664995665603783, "clip_ratio/low_min": 2.6575239644444082e-05, "clip_ratio/region_mean": 0.0011938643765461165, "completions/clipped_ratio": 0.013392857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1985.0, "completions/mean_length": 607.3850708007812, "completions/mean_terminated_length": 560.0283203125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 1.970262390670554, "grad_norm": 0.1385481059551239, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 123200687.0, "reward": 0.6227678656578064, "reward_std": 0.1695060133934021, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 211 }, { "clip_ratio/high_max": 0.002027250469836872, "clip_ratio/high_mean": 0.0007511444637202658, "clip_ratio/low_mean": 0.0005570881203311728, "clip_ratio/low_min": 1.2005378266621847e-05, "clip_ratio/region_mean": 0.001308232585870428, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2055.0, "completions/mean_length": 612.4085083007812, "completions/mean_terminated_length": 524.7208251953125, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 1.9795918367346939, "grad_norm": 0.14795175194740295, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 123743829.0, "reward": 0.6283482313156128, "reward_std": 0.17059119045734406, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 212 }, { "clip_ratio/high_max": 0.001795672404114157, "clip_ratio/high_mean": 0.0006457516119553475, "clip_ratio/low_mean": 0.0005550283021875657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012007799123239238, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2018.0, "completions/mean_length": 581.1897583007812, "completions/mean_terminated_length": 525.3991088867188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 1.9889212827988338, "grad_norm": 0.15848539769649506, "learning_rate": 1e-06, "loss": 0.0096, "num_tokens": 124299983.0, "reward": 0.6071428656578064, "reward_std": 0.16484861075878143, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 213 }, { "clip_ratio/high_max": 0.0015777293083374389, "clip_ratio/high_mean": 0.000572459652175894, "clip_ratio/low_mean": 0.0003918970678569167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009643567136663478, "completions/clipped_ratio": 0.02840909090909094, "completions/max_length": 4096.0, "completions/max_terminated_length": 3519.0, "completions/mean_length": 690.633544921875, "completions/mean_terminated_length": 591.0614013671875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 1.9982507288629736, "grad_norm": 0.1399824172258377, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 124888491.0, "reward": 0.613839328289032, "reward_std": 0.16029614210128784, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 214 }, { "clip_ratio/high_max": 0.0019608186412369832, "clip_ratio/high_mean": 0.0006975977848924231, "clip_ratio/low_mean": 0.0006009238823025953, "clip_ratio/low_min": 1.3174536434235051e-05, "clip_ratio/region_mean": 0.0012985216344532091, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3672.0, "completions/mean_length": 636.1317138671875, "completions/mean_terminated_length": 561.1744384765625, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 2.00932944606414, "grad_norm": 0.14438770711421967, "learning_rate": 1e-06, "loss": -0.0137, "num_tokens": 125469129.0, "reward": 0.6339285969734192, "reward_std": 0.17990799248218536, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 215 }, { "clip_ratio/high_max": 0.001747039375914028, "clip_ratio/high_mean": 0.0007047757480904693, "clip_ratio/low_mean": 0.00044305246865405934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001147828215835034, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3625.0, "completions/mean_length": 692.1038208007812, "completions/mean_terminated_length": 582.3007202148438, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 2.01865889212828, "grad_norm": 0.13369223475456238, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 126064246.0, "reward": 0.5970982313156128, "reward_std": 0.15488240122795105, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 216 }, { "clip_ratio/high_max": 0.0019376089476281777, "clip_ratio/high_mean": 0.0007361176521953894, "clip_ratio/low_mean": 0.0005074308755865786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012435485332389362, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 646.5881958007812, "completions/mean_terminated_length": 543.5023193359375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 2.02798833819242, "grad_norm": 0.15771497786045074, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 126630789.0, "reward": 0.5524553656578064, "reward_std": 0.16668446362018585, "rewards/verify_math_reward/mean": 0.5524553656578064, "rewards/verify_math_reward/std": 0.49751853942871094, "step": 217 }, { "clip_ratio/high_max": 0.0020025025005452335, "clip_ratio/high_mean": 0.0007051961019897135, "clip_ratio/low_mean": 0.0005879925811314024, "clip_ratio/low_min": 2.1136287614353932e-05, "clip_ratio/region_mean": 0.0012931887067679781, "completions/clipped_ratio": 0.0189732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 693.0111694335938, "completions/mean_terminated_length": 627.19677734375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.03731778425656, "grad_norm": 0.1345938891172409, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 127273407.0, "reward": 0.5714285969734192, "reward_std": 0.19032247364521027, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 218 }, { "clip_ratio/high_max": 0.002085612293740269, "clip_ratio/high_mean": 0.0007509668284910731, "clip_ratio/low_mean": 0.0005286602381602279, "clip_ratio/low_min": 2.0826391846640036e-05, "clip_ratio/region_mean": 0.0012796270530088805, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 596.810302734375, "completions/mean_terminated_length": 512.8297119140625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.0466472303206995, "grad_norm": 0.16107362508773804, "learning_rate": 1e-06, "loss": 0.0131, "num_tokens": 127809477.0, "reward": 0.6852678656578064, "reward_std": 0.16612105071544647, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 219 }, { "clip_ratio/high_max": 0.0017594212222320493, "clip_ratio/high_mean": 0.0007006275009189267, "clip_ratio/low_mean": 0.0006378696471074363, "clip_ratio/low_min": 9.72308680502465e-06, "clip_ratio/region_mean": 0.0013384971534833312, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2498.0, "completions/mean_length": 711.802490234375, "completions/mean_terminated_length": 598.6055297851562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.0559766763848395, "grad_norm": 0.15395912528038025, "learning_rate": 1e-06, "loss": -0.021, "num_tokens": 128423004.0, "reward": 0.5647321939468384, "reward_std": 0.18844525516033173, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 220 }, { "clip_ratio/high_max": 0.0016991322499961825, "clip_ratio/high_mean": 0.0006614746425839257, "clip_ratio/low_mean": 0.00036323781841929303, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010247124973830068, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1999.0, "completions/mean_length": 650.1261596679688, "completions/mean_terminated_length": 559.3413696289062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.0653061224489795, "grad_norm": 0.1293102353811264, "learning_rate": 1e-06, "loss": 0.006, "num_tokens": 128999221.0, "reward": 0.6104910969734192, "reward_std": 0.1566508710384369, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 221 }, { "clip_ratio/high_max": 0.001932140989083564, "clip_ratio/high_mean": 0.0006858597153041046, "clip_ratio/low_mean": 0.0005068721893621841, "clip_ratio/low_min": 1.9853874619002454e-05, "clip_ratio/region_mean": 0.0011927319246751722, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3364.0, "completions/mean_length": 701.7120971679688, "completions/mean_terminated_length": 596.2508544921875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 2.0746355685131195, "grad_norm": 0.14621222019195557, "learning_rate": 1e-06, "loss": -0.0102, "num_tokens": 129609011.0, "reward": 0.6071428656578064, "reward_std": 0.1595044881105423, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 222 }, { "clip_ratio/high_max": 0.0016444951834273525, "clip_ratio/high_mean": 0.0005902672855881974, "clip_ratio/low_mean": 0.0004461511125555262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010364184017817024, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3674.0, "completions/mean_length": 612.3660888671875, "completions/mean_terminated_length": 532.8310546875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.0839650145772595, "grad_norm": 0.12587220966815948, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 130176891.0, "reward": 0.6852678656578064, "reward_std": 0.13034509122371674, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 223 }, { "clip_ratio/high_max": 0.001594408957316773, "clip_ratio/high_mean": 0.0005833374389112578, "clip_ratio/low_mean": 0.000571088754441007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011544262270035688, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3728.0, "completions/mean_length": 706.3717041015625, "completions/mean_terminated_length": 576.7566528320312, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 2.0932944606413995, "grad_norm": 0.13816197216510773, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 130760336.0, "reward": 0.625, "reward_std": 0.16093555092811584, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 224 }, { "clip_ratio/high_max": 0.0017887444228108507, "clip_ratio/high_mean": 0.0007015915643933113, "clip_ratio/low_mean": 0.0005311268314471818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012327184194873553, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3030.0, "completions/mean_length": 711.6328735351562, "completions/mean_terminated_length": 602.4596557617188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.1026239067055394, "grad_norm": 0.13910657167434692, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 131381679.0, "reward": 0.5323660969734192, "reward_std": 0.17585085332393646, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 225 }, { "clip_ratio/high_max": 0.001888532075099647, "clip_ratio/high_mean": 0.0008734535094845342, "clip_ratio/low_mean": 0.0006097136374592083, "clip_ratio/low_min": 2.069764923362527e-05, "clip_ratio/region_mean": 0.0014831671505817212, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3364.0, "completions/mean_length": 716.9085083007812, "completions/mean_terminated_length": 611.91943359375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.1119533527696794, "grad_norm": 0.14524471759796143, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 132005285.0, "reward": 0.6383928656578064, "reward_std": 0.20140846073627472, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 226 }, { "clip_ratio/high_max": 0.0017761674280336592, "clip_ratio/high_mean": 0.0006098909088905202, "clip_ratio/low_mean": 0.0006259718484216137, "clip_ratio/low_min": 1.4344732335302979e-05, "clip_ratio/region_mean": 0.001235862753674155, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3936.0, "completions/mean_length": 646.9252319335938, "completions/mean_terminated_length": 564.1473999023438, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.1212827988338194, "grad_norm": 0.1339639276266098, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 132581666.0, "reward": 0.6462053656578064, "reward_std": 0.16766269505023956, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 227 }, { "clip_ratio/high_max": 0.001584508914675098, "clip_ratio/high_mean": 0.0007411727146973135, "clip_ratio/low_mean": 0.0006377400495694019, "clip_ratio/low_min": 1.3861166735296138e-05, "clip_ratio/region_mean": 0.0013789127515337896, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3111.0, "completions/mean_length": 714.2589721679688, "completions/mean_terminated_length": 584.9454956054688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.130612244897959, "grad_norm": 0.16043105721473694, "learning_rate": 1e-06, "loss": 0.0152, "num_tokens": 133174882.0, "reward": 0.598214328289032, "reward_std": 0.20316554605960846, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 228 }, { "clip_ratio/high_max": 0.00202133289349149, "clip_ratio/high_mean": 0.0007314009126275778, "clip_ratio/low_mean": 0.000528357786606648, "clip_ratio/low_min": 2.1324311092030257e-05, "clip_ratio/region_mean": 0.0012597587556228973, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3621.0, "completions/mean_length": 733.8069458007812, "completions/mean_terminated_length": 629.3429565429688, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 2.139941690962099, "grad_norm": 0.1381755769252777, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 133816717.0, "reward": 0.5714285969734192, "reward_std": 0.18257686495780945, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 229 }, { "clip_ratio/high_max": 0.0017229677687282674, "clip_ratio/high_mean": 0.000659604680549819, "clip_ratio/low_mean": 0.0007401865786960116, "clip_ratio/low_min": 8.758067815506365e-05, "clip_ratio/region_mean": 0.0013997912683407776, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 637.4967041015625, "completions/mean_terminated_length": 542.3084716796875, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 2.149271137026239, "grad_norm": 0.1531965136528015, "learning_rate": 1e-06, "loss": -0.0032, "num_tokens": 134375202.0, "reward": 0.613839328289032, "reward_std": 0.2004224956035614, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 230 }, { "clip_ratio/high_max": 0.001587845054018544, "clip_ratio/high_mean": 0.0006205899171618512, "clip_ratio/low_mean": 0.00047607856777176494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010966684676532168, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 658.1551513671875, "completions/mean_terminated_length": 547.2568969726562, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.158600583090379, "grad_norm": 0.2171899527311325, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 134939957.0, "reward": 0.590401828289032, "reward_std": 0.1589820384979248, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 231 }, { "clip_ratio/high_max": 0.0015843191467865836, "clip_ratio/high_mean": 0.000552942197828088, "clip_ratio/low_mean": 0.0005816124626107921, "clip_ratio/low_min": 7.835025826352648e-06, "clip_ratio/region_mean": 0.0011345546663505957, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3939.0, "completions/mean_length": 733.8939819335938, "completions/mean_terminated_length": 601.2818603515625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 2.167930029154519, "grad_norm": 0.14793087542057037, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 135548638.0, "reward": 0.5569196939468384, "reward_std": 0.16735707223415375, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 232 }, { "clip_ratio/high_max": 0.0018679627937672194, "clip_ratio/high_mean": 0.0006411258782463847, "clip_ratio/low_mean": 0.0006753035140718566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00131642939595622, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3750.0, "completions/mean_length": 691.0435791015625, "completions/mean_terminated_length": 609.3245849609375, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "epoch": 2.177259475218659, "grad_norm": 0.14007490873336792, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 136173061.0, "reward": 0.5602678656578064, "reward_std": 0.1868622601032257, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 233 }, { "clip_ratio/high_max": 0.0016494242208864307, "clip_ratio/high_mean": 0.0006789761091567925, "clip_ratio/low_mean": 0.0006994724626565585, "clip_ratio/low_min": 3.795822976826457e-05, "clip_ratio/region_mean": 0.0013784485818177927, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2673.0, "completions/mean_length": 596.630615234375, "completions/mean_terminated_length": 537.0499877929688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.186588921282799, "grad_norm": 0.15200549364089966, "learning_rate": 1e-06, "loss": 0.0045, "num_tokens": 136753466.0, "reward": 0.5502232313156128, "reward_std": 0.19666621088981628, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 234 }, { "clip_ratio/high_max": 0.002031806063314434, "clip_ratio/high_mean": 0.0008012997132027522, "clip_ratio/low_mean": 0.0005328882953108405, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.0013341879821382463, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1817.0, "completions/mean_length": 625.3069458007812, "completions/mean_terminated_length": 525.6888427734375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 2.195918367346939, "grad_norm": 0.16664724051952362, "learning_rate": 1e-06, "loss": 0.0064, "num_tokens": 137298637.0, "reward": 0.621651828289032, "reward_std": 0.195388525724411, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 235 }, { "clip_ratio/high_max": 0.001413239078829065, "clip_ratio/high_mean": 0.0005412902673924691, "clip_ratio/low_mean": 0.00039417503830918577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009354653302580118, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 635.8214721679688, "completions/mean_terminated_length": 540.587158203125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.205247813411079, "grad_norm": 0.1374477744102478, "learning_rate": 1e-06, "loss": -0.0058, "num_tokens": 137859461.0, "reward": 0.6417410969734192, "reward_std": 0.1322651207447052, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975653409957886, "step": 236 }, { "clip_ratio/high_max": 0.0020341953641036525, "clip_ratio/high_mean": 0.000699316549798823, "clip_ratio/low_mean": 0.000633741795354581, "clip_ratio/low_min": 3.271380228397902e-05, "clip_ratio/region_mean": 0.0013330583642527927, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2705.0, "completions/mean_length": 654.0625, "completions/mean_terminated_length": 555.269775390625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.2145772594752184, "grad_norm": 0.15545177459716797, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 138426509.0, "reward": 0.5959821939468384, "reward_std": 0.19335976243019104, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 237 }, { "clip_ratio/high_max": 0.0016926105454331264, "clip_ratio/high_mean": 0.0006320074353425298, "clip_ratio/low_mean": 0.0005650783195960685, "clip_ratio/low_min": 1.4156285033095628e-05, "clip_ratio/region_mean": 0.0011970857522101142, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3088.0, "completions/mean_length": 656.4308471679688, "completions/mean_terminated_length": 557.7060546875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.2239067055393584, "grad_norm": 0.14693720638751984, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 138994007.0, "reward": 0.5736607313156128, "reward_std": 0.1749846339225769, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 238 }, { "clip_ratio/high_max": 0.0017540780572744552, "clip_ratio/high_mean": 0.0006414892068278277, "clip_ratio/low_mean": 0.0005285826046019793, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011700718314386904, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 619.7767944335938, "completions/mean_terminated_length": 528.1924438476562, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.2332361516034984, "grad_norm": 0.1568622887134552, "learning_rate": 1e-06, "loss": 0.0138, "num_tokens": 139546311.0, "reward": 0.6071428656578064, "reward_std": 0.1755894124507904, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 239 }, { "clip_ratio/high_max": 0.0016234175564022735, "clip_ratio/high_mean": 0.0005522355531866197, "clip_ratio/low_mean": 0.0004864165694016265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001038652117131278, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2066.0, "completions/mean_length": 685.591552734375, "completions/mean_terminated_length": 538.69384765625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 2.2425655976676384, "grad_norm": 0.14180706441402435, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 140084193.0, "reward": 0.676339328289032, "reward_std": 0.15041227638721466, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 240 }, { "clip_ratio/high_max": 0.0020767255045939237, "clip_ratio/high_mean": 0.0006557366978086066, "clip_ratio/low_mean": 0.0005495269324455876, "clip_ratio/low_min": 8.420910489803646e-06, "clip_ratio/region_mean": 0.0012052636375301518, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2247.0, "completions/mean_length": 629.234375, "completions/mean_terminated_length": 554.127685546875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.2518950437317784, "grad_norm": 0.14197291433811188, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 140658643.0, "reward": 0.6049107313156128, "reward_std": 0.16075009107589722, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 241 }, { "clip_ratio/high_max": 0.002139833912224276, "clip_ratio/high_mean": 0.0006846389969723532, "clip_ratio/low_mean": 0.000460942414974852, "clip_ratio/low_min": 1.6104097085190006e-05, "clip_ratio/region_mean": 0.0011455814164946787, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3501.0, "completions/mean_length": 638.7433471679688, "completions/mean_terminated_length": 563.8426513671875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.2612244897959184, "grad_norm": 0.14257590472698212, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 141233101.0, "reward": 0.6718750596046448, "reward_std": 0.1579635888338089, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 242 }, { "clip_ratio/high_max": 0.001527231670479523, "clip_ratio/high_mean": 0.0005186072030483047, "clip_ratio/low_mean": 0.00044814040757046314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000966747640632093, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2871.0, "completions/mean_length": 660.1730346679688, "completions/mean_terminated_length": 549.33984375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 2.2705539358600584, "grad_norm": 0.12827670574188232, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 141795584.0, "reward": 0.5658482313156128, "reward_std": 0.1409432291984558, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 243 }, { "clip_ratio/high_max": 0.0016318869857059326, "clip_ratio/high_mean": 0.0005328227020982013, "clip_ratio/low_mean": 0.0005002548914490035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010330776203772984, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 575.9029541015625, "completions/mean_terminated_length": 511.901123046875, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 2.2798833819241984, "grad_norm": 0.1491820514202118, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 142323121.0, "reward": 0.6383928656578064, "reward_std": 0.12854453921318054, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 244 }, { "clip_ratio/high_max": 0.001400911838572938, "clip_ratio/high_mean": 0.0005099018467262795, "clip_ratio/low_mean": 0.0005332937489583855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010431956179672852, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 699.450927734375, "completions/mean_terminated_length": 569.5712280273438, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.2892128279883384, "grad_norm": 0.14287753403186798, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 142897917.0, "reward": 0.5970982313156128, "reward_std": 0.1634860783815384, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 245 }, { "clip_ratio/high_max": 0.0018064162686641794, "clip_ratio/high_mean": 0.0006929526716703549, "clip_ratio/low_mean": 0.000614358965322026, "clip_ratio/low_min": 1.6297262845910154e-05, "clip_ratio/region_mean": 0.0013073116570012644, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2610.0, "completions/mean_length": 651.6607666015625, "completions/mean_terminated_length": 564.9610595703125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.298542274052478, "grad_norm": 0.16102290153503418, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 143484685.0, "reward": 0.5703125, "reward_std": 0.1980997622013092, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 246 }, { "clip_ratio/high_max": 0.0016381983587052673, "clip_ratio/high_mean": 0.0006063256041670684, "clip_ratio/low_mean": 0.00041601886732678395, "clip_ratio/low_min": 1.1895698662556242e-05, "clip_ratio/region_mean": 0.001022344469674863, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2455.0, "completions/mean_length": 636.1361694335938, "completions/mean_terminated_length": 561.1790161132812, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 2.307871720116618, "grad_norm": 0.1496383249759674, "learning_rate": 1e-06, "loss": 0.0083, "num_tokens": 144072527.0, "reward": 0.590401828289032, "reward_std": 0.1496659368276596, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 247 }, { "clip_ratio/high_max": 0.0021030395073466934, "clip_ratio/high_mean": 0.0008465516311844112, "clip_ratio/low_mean": 0.0005066342091595288, "clip_ratio/low_min": 1.3504753951565363e-05, "clip_ratio/region_mean": 0.0013531858385249507, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 647.4330444335938, "completions/mean_terminated_length": 536.18896484375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.317201166180758, "grad_norm": 0.1665707528591156, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 144628939.0, "reward": 0.5915178656578064, "reward_std": 0.21658296883106232, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 248 }, { "clip_ratio/high_max": 0.0015528605035797227, "clip_ratio/high_mean": 0.0006142230213299626, "clip_ratio/low_mean": 0.00034504018594816444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009592631977284327, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3226.0, "completions/mean_length": 700.0189819335938, "completions/mean_terminated_length": 582.3753051757812, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.326530612244898, "grad_norm": 0.12995047867298126, "learning_rate": 1e-06, "loss": 0.0058, "num_tokens": 145223932.0, "reward": 0.5915178656578064, "reward_std": 0.14515261352062225, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 249 }, { "clip_ratio/high_max": 0.0015787725133122876, "clip_ratio/high_mean": 0.000603437587415101, "clip_ratio/low_mean": 0.00041420321167606744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010176408359257039, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 654.3638916015625, "completions/mean_terminated_length": 543.3433227539062, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 2.335860058309038, "grad_norm": 0.14374585449695587, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 145778618.0, "reward": 0.6495535969734192, "reward_std": 0.16070660948753357, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 250 }, { "clip_ratio/high_max": 0.0019336156474309973, "clip_ratio/high_mean": 0.0006991442587604979, "clip_ratio/low_mean": 0.0005083418527647154, "clip_ratio/low_min": 1.1222840839764103e-05, "clip_ratio/region_mean": 0.0012074861042492557, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2679.0, "completions/mean_length": 659.286865234375, "completions/mean_terminated_length": 527.871337890625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.345189504373178, "grad_norm": 0.15194326639175415, "learning_rate": 1e-06, "loss": -0.0078, "num_tokens": 146326011.0, "reward": 0.5401785969734192, "reward_std": 0.16642414033412933, "rewards/verify_math_reward/mean": 0.5401785969734192, "rewards/verify_math_reward/std": 0.49866142868995667, "step": 251 }, { "clip_ratio/high_max": 0.001801699421775993, "clip_ratio/high_mean": 0.0007357687318290118, "clip_ratio/low_mean": 0.0006126707685325528, "clip_ratio/low_min": 3.877171184285544e-05, "clip_ratio/region_mean": 0.001348439502180554, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 658.0870971679688, "completions/mean_terminated_length": 522.4849243164062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.354518950437318, "grad_norm": 0.1665963977575302, "learning_rate": 1e-06, "loss": 0.0089, "num_tokens": 146861721.0, "reward": 0.590401828289032, "reward_std": 0.1912222057580948, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 252 }, { "clip_ratio/high_max": 0.0017145752426586114, "clip_ratio/high_mean": 0.0006349218183459016, "clip_ratio/low_mean": 0.00037618277292494895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010111046212841757, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2928.0, "completions/mean_length": 653.325927734375, "completions/mean_terminated_length": 550.44140625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.363848396501458, "grad_norm": 0.12984620034694672, "learning_rate": 1e-06, "loss": -0.0086, "num_tokens": 147425741.0, "reward": 0.6595982313156128, "reward_std": 0.13940228521823883, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 253 }, { "clip_ratio/high_max": 0.0015443033153133001, "clip_ratio/high_mean": 0.000532831417331181, "clip_ratio/low_mean": 0.0006252439561649226, "clip_ratio/low_min": 6.449988632084569e-05, "clip_ratio/region_mean": 0.0011580753744055983, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 720.2455444335938, "completions/mean_terminated_length": 603.3025512695312, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 2.373177842565598, "grad_norm": 0.14159341156482697, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 148030657.0, "reward": 0.5636160969734192, "reward_std": 0.16724716126918793, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 254 }, { "clip_ratio/high_max": 0.0019088175176875666, "clip_ratio/high_mean": 0.0007623265610163799, "clip_ratio/low_mean": 0.00039129305423557526, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011536196179804392, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2227.0, "completions/mean_length": 616.1551513671875, "completions/mean_terminated_length": 532.6388549804688, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 2.3825072886297374, "grad_norm": 0.14764274656772614, "learning_rate": 1e-06, "loss": -0.016, "num_tokens": 148581748.0, "reward": 0.6651785969734192, "reward_std": 0.17810744047164917, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 255 }, { "clip_ratio/high_max": 0.0015490252699237317, "clip_ratio/high_mean": 0.0005637399626721162, "clip_ratio/low_mean": 0.0003616811254687491, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00092542108905036, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3699.0, "completions/mean_length": 648.4553833007812, "completions/mean_terminated_length": 593.732421875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 2.3918367346938774, "grad_norm": 0.13943234086036682, "learning_rate": 1e-06, "loss": 0.0078, "num_tokens": 149199116.0, "reward": 0.578125, "reward_std": 0.14083515107631683, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 256 }, { "clip_ratio/high_max": 0.00218620785017265, "clip_ratio/high_mean": 0.0009041907687787898, "clip_ratio/low_mean": 0.0005129284945724066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014171192960930057, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 665.1942138671875, "completions/mean_terminated_length": 582.8548583984375, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 2.4011661807580174, "grad_norm": 0.14255432784557343, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 149797626.0, "reward": 0.6082589626312256, "reward_std": 0.1925688236951828, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 257 }, { "clip_ratio/high_max": 0.001797141281713266, "clip_ratio/high_mean": 0.0006807713007219718, "clip_ratio/low_mean": 0.0005956136137683643, "clip_ratio/low_min": 1.3926024621468969e-05, "clip_ratio/region_mean": 0.0012763849335897248, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2351.0, "completions/mean_length": 608.4285888671875, "completions/mean_terminated_length": 536.9293823242188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 2.4104956268221573, "grad_norm": 0.1662139594554901, "learning_rate": 1e-06, "loss": 0.0027, "num_tokens": 150353954.0, "reward": 0.645089328289032, "reward_std": 0.19801212847232819, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 258 }, { "clip_ratio/high_max": 0.002128478605300188, "clip_ratio/high_mean": 0.0008221288917411584, "clip_ratio/low_mean": 0.0004552070704448852, "clip_ratio/low_min": 1.4902241673553362e-05, "clip_ratio/region_mean": 0.0012773359740094747, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3439.0, "completions/mean_length": 602.7600708007812, "completions/mean_terminated_length": 543.2838134765625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 2.4198250728862973, "grad_norm": 0.15467597544193268, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 150924123.0, "reward": 0.660714328289032, "reward_std": 0.16371431946754456, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 259 }, { "clip_ratio/high_max": 0.001650483456614893, "clip_ratio/high_mean": 0.0007707060449320124, "clip_ratio/low_mean": 0.0006107790904934518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013814851445204113, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3470.0, "completions/mean_length": 683.4230346679688, "completions/mean_terminated_length": 561.12255859375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 2.4291545189504373, "grad_norm": 0.1504625380039215, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 151501126.0, "reward": 0.5714285969734192, "reward_std": 0.191038578748703, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 260 }, { "clip_ratio/high_max": 0.0017616295153857209, "clip_ratio/high_mean": 0.0006268397346502752, "clip_ratio/low_mean": 0.0005830239133501891, "clip_ratio/low_min": 3.093332452408504e-05, "clip_ratio/region_mean": 0.0012098636179871392, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2362.0, "completions/mean_length": 632.9017944335938, "completions/mean_terminated_length": 573.938720703125, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 2.4384839650145773, "grad_norm": 0.16305653750896454, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 152102046.0, "reward": 0.6205357313156128, "reward_std": 0.1796495020389557, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 261 }, { "clip_ratio/high_max": 0.001908959475258598, "clip_ratio/high_mean": 0.0007649866220162949, "clip_ratio/low_mean": 0.0006326360653474694, "clip_ratio/low_min": 1.3001872503082268e-05, "clip_ratio/region_mean": 0.001397622712829616, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2269.0, "completions/mean_length": 665.8582763671875, "completions/mean_terminated_length": 530.5626220703125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 2.4478134110787173, "grad_norm": 0.15925399959087372, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 152648231.0, "reward": 0.5636160969734192, "reward_std": 0.1882181465625763, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 262 }, { "clip_ratio/high_max": 0.0016611664577794727, "clip_ratio/high_mean": 0.0005999356662869104, "clip_ratio/low_mean": 0.0005950688196207921, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011950044900004286, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 648.9933471679688, "completions/mean_terminated_length": 529.5819702148438, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.4571428571428573, "grad_norm": 0.12971743941307068, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 153191657.0, "reward": 0.6071428656578064, "reward_std": 0.1479288637638092, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 263 }, { "clip_ratio/high_max": 0.0017268379779125098, "clip_ratio/high_mean": 0.000678713744491688, "clip_ratio/low_mean": 0.0006471916422015056, "clip_ratio/low_min": 1.4633575119660236e-05, "clip_ratio/region_mean": 0.001325905406702077, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2479.0, "completions/mean_length": 647.1127319335938, "completions/mean_terminated_length": 539.9551391601562, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.466472303206997, "grad_norm": 0.1600545346736908, "learning_rate": 1e-06, "loss": -0.0099, "num_tokens": 153755110.0, "reward": 0.5959821939468384, "reward_std": 0.20831559598445892, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 264 }, { "clip_ratio/high_max": 0.0013077658313704887, "clip_ratio/high_mean": 0.00045768982727167895, "clip_ratio/low_mean": 0.0002947282459899725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007524180691689253, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 709.7835083007812, "completions/mean_terminated_length": 568.034912109375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 2.4758017492711373, "grad_norm": 0.12772515416145325, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 154332364.0, "reward": 0.5859375, "reward_std": 0.13023702800273895, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 265 }, { "clip_ratio/high_max": 0.0019189923150406685, "clip_ratio/high_mean": 0.0007898198637121823, "clip_ratio/low_mean": 0.000392467261008278, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011822871238109656, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2964.0, "completions/mean_length": 665.5223388671875, "completions/mean_terminated_length": 526.072021484375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 2.485131195335277, "grad_norm": 0.16601820290088654, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 154868656.0, "reward": 0.6707589626312256, "reward_std": 0.19001504778862, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 266 }, { "clip_ratio/high_max": 0.0015905233594821766, "clip_ratio/high_mean": 0.0005608878418570384, "clip_ratio/low_mean": 0.0005168577772565186, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010777456154755782, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3940.0, "completions/mean_length": 636.3381958007812, "completions/mean_terminated_length": 549.2528076171875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 2.494460641399417, "grad_norm": 0.13197234272956848, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 155434911.0, "reward": 0.621651828289032, "reward_std": 0.13177543878555298, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.485245943069458, "step": 267 }, { "clip_ratio/high_max": 0.0016901352646527812, "clip_ratio/high_mean": 0.0006072937467251904, "clip_ratio/low_mean": 0.00041761771080928156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010249114711768925, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 755.8850708007812, "completions/mean_terminated_length": 636.1815185546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.503790087463557, "grad_norm": 0.13727904856204987, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 156084184.0, "reward": 0.582589328289032, "reward_std": 0.1645801067352295, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.4934072494506836, "step": 268 }, { "clip_ratio/high_max": 0.002206893463153392, "clip_ratio/high_mean": 0.0008154273909894982, "clip_ratio/low_mean": 0.0005104676602059044, "clip_ratio/low_min": 6.633410976064624e-06, "clip_ratio/region_mean": 0.0013258950493764132, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3698.0, "completions/mean_length": 654.0714721679688, "completions/mean_terminated_length": 559.3394165039062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.513119533527697, "grad_norm": 0.15356619656085968, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 156656768.0, "reward": 0.6473214626312256, "reward_std": 0.1895604282617569, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 269 }, { "clip_ratio/high_max": 0.0017181057191919535, "clip_ratio/high_mean": 0.0005694239230251696, "clip_ratio/low_mean": 0.0004686009879151243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010380249223089777, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 2846.0, "completions/mean_length": 631.4765625, "completions/mean_terminated_length": 552.3778076171875, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 2.522448979591837, "grad_norm": 0.14008112251758575, "learning_rate": 1e-06, "loss": -0.01, "num_tokens": 157234427.0, "reward": 0.6082589626312256, "reward_std": 0.1346297711133957, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 270 }, { "clip_ratio/high_max": 0.0020013369012303883, "clip_ratio/high_mean": 0.0006366890547724324, "clip_ratio/low_mean": 0.0005819433490614756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001218632394738961, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 705.5301513671875, "completions/mean_terminated_length": 616.205078125, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 2.5317784256559768, "grad_norm": 0.143727645277977, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 157883374.0, "reward": 0.5792410969734192, "reward_std": 0.16676117479801178, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 271 }, { "clip_ratio/high_max": 0.0019367850800335873, "clip_ratio/high_mean": 0.0007181636847235495, "clip_ratio/low_mean": 0.0006665767095910269, "clip_ratio/low_min": 5.396064170781756e-05, "clip_ratio/region_mean": 0.0013847403970430605, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 1988.0, "completions/mean_length": 635.3538208007812, "completions/mean_terminated_length": 548.24365234375, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 2.5411078717201168, "grad_norm": 0.16358663141727448, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 158457627.0, "reward": 0.6082589626312256, "reward_std": 0.1747995913028717, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 272 }, { "clip_ratio/high_max": 0.0021312850803951733, "clip_ratio/high_mean": 0.0008729010842216667, "clip_ratio/low_mean": 0.0006221409239515197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014950420190871228, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2360.0, "completions/mean_length": 659.3917846679688, "completions/mean_terminated_length": 564.80615234375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 2.5504373177842563, "grad_norm": 0.15482480823993683, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 159032274.0, "reward": 0.629464328289032, "reward_std": 0.20749185979366302, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 273 }, { "clip_ratio/high_max": 0.0017957189520529937, "clip_ratio/high_mean": 0.0006052036906112335, "clip_ratio/low_mean": 0.0005452909608720802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011504946523928083, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3438.0, "completions/mean_length": 800.5938110351562, "completions/mean_terminated_length": 634.4712524414062, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 2.5597667638483967, "grad_norm": 0.13584306836128235, "learning_rate": 1e-06, "loss": -0.0064, "num_tokens": 159658286.0, "reward": 0.5479910969734192, "reward_std": 0.17423324286937714, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 274 }, { "clip_ratio/high_max": 0.0019833818441838957, "clip_ratio/high_mean": 0.0008121807804855052, "clip_ratio/low_mean": 0.0007322955134441145, "clip_ratio/low_min": 4.051339146826649e-05, "clip_ratio/region_mean": 0.0015444763157574926, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2553.0, "completions/mean_length": 659.607177734375, "completions/mean_terminated_length": 573.1075439453125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 2.5690962099125363, "grad_norm": 0.16442091763019562, "learning_rate": 1e-06, "loss": 0.0062, "num_tokens": 160246262.0, "reward": 0.5770089626312256, "reward_std": 0.1950957179069519, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099617958069, "step": 275 }, { "clip_ratio/high_max": 0.0020285667706048116, "clip_ratio/high_mean": 0.0008122730541799683, "clip_ratio/low_mean": 0.0005162023753655376, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013284754131746013, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3548.0, "completions/mean_length": 677.9207763671875, "completions/mean_terminated_length": 587.8682861328125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.5784256559766763, "grad_norm": 0.16298985481262207, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 160856743.0, "reward": 0.6194196939468384, "reward_std": 0.18223848938941956, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580071330070496, "step": 276 }, { "clip_ratio/high_max": 0.0012773446906066965, "clip_ratio/high_mean": 0.00046138433572195936, "clip_ratio/low_mean": 0.0005144769911566982, "clip_ratio/low_min": 1.004338719212683e-05, "clip_ratio/region_mean": 0.0009758613596204668, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4020.0, "completions/mean_length": 618.3527221679688, "completions/mean_terminated_length": 559.1419067382812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 2.5877551020408163, "grad_norm": 0.1506633758544922, "learning_rate": 1e-06, "loss": 0.0107, "num_tokens": 161436283.0, "reward": 0.5803571939468384, "reward_std": 0.15323200821876526, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 277 }, { "clip_ratio/high_max": 0.0016585250850766897, "clip_ratio/high_mean": 0.0006474419169535395, "clip_ratio/low_mean": 0.000588792541748262, "clip_ratio/low_min": 2.9994330361660104e-05, "clip_ratio/region_mean": 0.0012362344841676531, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3539.0, "completions/mean_length": 696.2098388671875, "completions/mean_terminated_length": 578.4342041015625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.5970845481049563, "grad_norm": 0.1527535766363144, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 162029799.0, "reward": 0.5892857313156128, "reward_std": 0.18652454018592834, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 278 }, { "clip_ratio/high_max": 0.001710698736133054, "clip_ratio/high_mean": 0.0007456493276549736, "clip_ratio/low_mean": 0.0005641687066599843, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013098180315864738, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1964.0, "completions/mean_length": 624.9765625, "completions/mean_terminated_length": 525.3489990234375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 2.6064139941690962, "grad_norm": 0.15717408061027527, "learning_rate": 1e-06, "loss": -0.0128, "num_tokens": 162574402.0, "reward": 0.640625, "reward_std": 0.20094947516918182, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 279 }, { "clip_ratio/high_max": 0.0016847828483150806, "clip_ratio/high_mean": 0.0006836661832494428, "clip_ratio/low_mean": 0.0005904825584366336, "clip_ratio/low_min": 1.7561113054398447e-05, "clip_ratio/region_mean": 0.0012741487589664757, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 662.0748291015625, "completions/mean_terminated_length": 543.1166381835938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 2.6157434402332362, "grad_norm": 0.15844663977622986, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 163129125.0, "reward": 0.59375, "reward_std": 0.1752542406320572, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 280 }, { "clip_ratio/high_max": 0.0019050513583351858, "clip_ratio/high_mean": 0.0008218782295443816, "clip_ratio/low_mean": 0.0004894271578450571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013113054010318592, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3920.0, "completions/mean_length": 605.3381958007812, "completions/mean_terminated_length": 513.3734741210938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 2.6250728862973762, "grad_norm": 0.1863740086555481, "learning_rate": 1e-06, "loss": 0.0071, "num_tokens": 163658220.0, "reward": 0.6930803656578064, "reward_std": 0.1828383058309555, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147334575653076, "step": 281 }, { "clip_ratio/high_max": 0.0019721330536413006, "clip_ratio/high_mean": 0.000803927548986394, "clip_ratio/low_mean": 0.0003882240598613862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001192151612485759, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3741.0, "completions/mean_length": 642.8482666015625, "completions/mean_terminated_length": 555.9267578125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 2.6344023323615158, "grad_norm": 0.14842215180397034, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 164228764.0, "reward": 0.6886160969734192, "reward_std": 0.16296431422233582, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331799030303955, "step": 282 }, { "clip_ratio/high_max": 0.0017780308335204609, "clip_ratio/high_mean": 0.0006858137767267181, "clip_ratio/low_mean": 0.00055878469174786, "clip_ratio/low_min": 8.270477337646298e-06, "clip_ratio/region_mean": 0.0012445984611986205, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 672.7176513671875, "completions/mean_terminated_length": 545.9293823242188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 2.643731778425656, "grad_norm": 0.15470071136951447, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 164780959.0, "reward": 0.6049107313156128, "reward_std": 0.17442938685417175, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 283 }, { "clip_ratio/high_max": 0.001965646730241133, "clip_ratio/high_mean": 0.0007492106942663668, "clip_ratio/low_mean": 0.0004284022634237772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011776129540521652, "completions/clipped_ratio": 0.017857142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3432.0, "completions/mean_length": 590.1495971679688, "completions/mean_terminated_length": 526.4067993164062, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 2.6530612244897958, "grad_norm": 0.16078557074069977, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 165339453.0, "reward": 0.6808035969734192, "reward_std": 0.1886282116174698, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 284 }, { "clip_ratio/high_max": 0.0014745079060958233, "clip_ratio/high_mean": 0.0005221892333793221, "clip_ratio/low_mean": 0.0005899140305700712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011121032730443403, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3898.0, "completions/mean_length": 670.4542846679688, "completions/mean_terminated_length": 576.1731567382812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.6623906705539357, "grad_norm": 0.14048659801483154, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 165931444.0, "reward": 0.5837053656578064, "reward_std": 0.15379653871059418, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 285 }, { "clip_ratio/high_max": 0.00173973134951666, "clip_ratio/high_mean": 0.0006114605039329035, "clip_ratio/low_mean": 0.00041383907682757126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010252995743940119, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 787.6707763671875, "completions/mean_terminated_length": 645.1699829101562, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.6717201166180757, "grad_norm": 0.12932142615318298, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 166574605.0, "reward": 0.5223214626312256, "reward_std": 0.1613806188106537, "rewards/verify_math_reward/mean": 0.5223214030265808, "rewards/verify_math_reward/std": 0.49978047609329224, "step": 286 }, { "clip_ratio/high_max": 0.0017241667810594663, "clip_ratio/high_mean": 0.0006313373833108926, "clip_ratio/low_mean": 0.00058198638180329, "clip_ratio/low_min": 3.329036189825274e-05, "clip_ratio/region_mean": 0.001213323776028119, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3684.0, "completions/mean_length": 738.1261596679688, "completions/mean_terminated_length": 589.4091186523438, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.6810495626822157, "grad_norm": 0.166813924908638, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 167174934.0, "reward": 0.5368303656578064, "reward_std": 0.1808943897485733, "rewards/verify_math_reward/mean": 0.5368303656578064, "rewards/verify_math_reward/std": 0.49892017245292664, "step": 287 }, { "clip_ratio/high_max": 0.0014300073598860763, "clip_ratio/high_mean": 0.0005297455390973482, "clip_ratio/low_mean": 0.00047350670047308085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010032522441179026, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3678.0, "completions/mean_length": 697.0670166015625, "completions/mean_terminated_length": 599.5086059570312, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 2.6903790087463557, "grad_norm": 0.14425304532051086, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 167781954.0, "reward": 0.5848214626312256, "reward_std": 0.16506868600845337, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 288 }, { "clip_ratio/high_max": 0.0015350020094047068, "clip_ratio/high_mean": 0.0006349265963763173, "clip_ratio/low_mean": 0.0006083126745579648, "clip_ratio/low_min": 1.2398333637975156e-05, "clip_ratio/region_mean": 0.0012432392904884182, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 700.9699096679688, "completions/mean_terminated_length": 579.2982788085938, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 2.6997084548104957, "grad_norm": 0.16011632978916168, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 168367335.0, "reward": 0.5580357313156128, "reward_std": 0.19779597222805023, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689781665802, "step": 289 }, { "clip_ratio/high_max": 0.0021332379037630744, "clip_ratio/high_mean": 0.0007598556130687939, "clip_ratio/low_mean": 0.000539392859536747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012992484953429084, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 646.318115234375, "completions/mean_terminated_length": 555.4330444335938, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 2.7090379008746357, "grad_norm": 0.15972739458084106, "learning_rate": 1e-06, "loss": -0.004, "num_tokens": 168939676.0, "reward": 0.6517857313156128, "reward_std": 0.1827288419008255, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 290 }, { "clip_ratio/high_max": 0.0018446759895596188, "clip_ratio/high_mean": 0.0005988040938973427, "clip_ratio/low_mean": 0.0005522559067685506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011510599761095364, "completions/clipped_ratio": 0.016741071428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 3810.0, "completions/mean_length": 629.6127319335938, "completions/mean_terminated_length": 570.5936889648438, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.7183673469387752, "grad_norm": 0.1624128669500351, "learning_rate": 1e-06, "loss": -0.0033, "num_tokens": 169532665.0, "reward": 0.6160714626312256, "reward_std": 0.16532830893993378, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 291 }, { "clip_ratio/high_max": 0.0020158386978437193, "clip_ratio/high_mean": 0.000681201014231192, "clip_ratio/low_mean": 0.0006710060661134776, "clip_ratio/low_min": 1.8312335669179447e-05, "clip_ratio/region_mean": 0.001352207091258606, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3901.0, "completions/mean_length": 773.9598388671875, "completions/mean_terminated_length": 598.2937622070312, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 2.7276967930029157, "grad_norm": 0.16120792925357819, "learning_rate": 1e-06, "loss": -0.0105, "num_tokens": 170144181.0, "reward": 0.5078125, "reward_std": 0.19756634533405304, "rewards/verify_math_reward/mean": 0.5078125, "rewards/verify_math_reward/std": 0.5002182126045227, "step": 292 }, { "clip_ratio/high_max": 0.0020143109359196387, "clip_ratio/high_mean": 0.0007559222949566902, "clip_ratio/low_mean": 0.0005012179080949863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00125714020759915, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3833.0, "completions/mean_length": 780.6239013671875, "completions/mean_terminated_length": 633.7890625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.7370262390670552, "grad_norm": 0.16876362264156342, "learning_rate": 1e-06, "loss": -0.0193, "num_tokens": 170778932.0, "reward": 0.590401828289032, "reward_std": 0.18840500712394714, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 293 }, { "clip_ratio/high_max": 0.0017424338420823915, "clip_ratio/high_mean": 0.0006393464655047865, "clip_ratio/low_mean": 0.0004578434029554046, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010971898809657432, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3937.0, "completions/mean_length": 700.6261596679688, "completions/mean_terminated_length": 562.602783203125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 2.746355685131195, "grad_norm": 0.1508655995130539, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 171364165.0, "reward": 0.5703125, "reward_std": 0.15187835693359375, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 294 }, { "clip_ratio/high_max": 0.0016017416528484318, "clip_ratio/high_mean": 0.0006121256719779922, "clip_ratio/low_mean": 0.0004792724794242531, "clip_ratio/low_min": 1.2361550943751354e-05, "clip_ratio/region_mean": 0.0010913981568592135, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 727.005615234375, "completions/mean_terminated_length": 569.575927734375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.755685131195335, "grad_norm": 0.15585237741470337, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 171937642.0, "reward": 0.6395089626312256, "reward_std": 0.16476628184318542, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 295 }, { "clip_ratio/high_max": 0.0018939640758617315, "clip_ratio/high_mean": 0.0007643671979167266, "clip_ratio/low_mean": 0.0005092514757052413, "clip_ratio/low_min": 1.817917473090347e-05, "clip_ratio/region_mean": 0.001273618639970664, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 675.0078125, "completions/mean_terminated_length": 576.8162841796875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.765014577259475, "grad_norm": 0.1620856523513794, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 172528713.0, "reward": 0.629464328289032, "reward_std": 0.17078480124473572, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 296 }, { "clip_ratio/high_max": 0.0016967893825494684, "clip_ratio/high_mean": 0.0006355356690619374, "clip_ratio/low_mean": 0.0006271112733884365, "clip_ratio/low_min": 3.977091910201125e-05, "clip_ratio/region_mean": 0.0012626469506358262, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 664.739990234375, "completions/mean_terminated_length": 570.3015747070312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.774344023323615, "grad_norm": 0.2294016033411026, "learning_rate": 1e-06, "loss": 0.0055, "num_tokens": 173114304.0, "reward": 0.5446428656578064, "reward_std": 0.1891920119524002, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.4982811510562897, "step": 297 }, { "clip_ratio/high_max": 0.0016081339344964363, "clip_ratio/high_mean": 0.0005451015913422452, "clip_ratio/low_mean": 0.000597168831518502, "clip_ratio/low_min": 1.990445889532566e-05, "clip_ratio/region_mean": 0.0011422704192227684, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3131.0, "completions/mean_length": 673.5926513671875, "completions/mean_terminated_length": 575.3604736328125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.783673469387755, "grad_norm": 0.1499912440776825, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 173707715.0, "reward": 0.5814732313156128, "reward_std": 0.16427773237228394, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 298 }, { "clip_ratio/high_max": 0.0017524625054647913, "clip_ratio/high_mean": 0.0006328346871669055, "clip_ratio/low_mean": 0.0006639741823164513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012968088776688091, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3969.0, "completions/mean_length": 682.2890625, "completions/mean_terminated_length": 559.9479370117188, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 2.793002915451895, "grad_norm": 0.1889982670545578, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 174279902.0, "reward": 0.5703125, "reward_std": 0.18201345205307007, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 299 }, { "clip_ratio/high_max": 0.0019045324734179303, "clip_ratio/high_mean": 0.0007080908726493362, "clip_ratio/low_mean": 0.0005070006645837566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012150915536039975, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 763.2980346679688, "completions/mean_terminated_length": 611.634765625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 2.8023323615160347, "grad_norm": 0.1669740378856659, "learning_rate": 1e-06, "loss": 0.0093, "num_tokens": 174888033.0, "reward": 0.6149553656578064, "reward_std": 0.176642507314682, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 300 }, { "clip_ratio/high_max": 0.001616864505194826, "clip_ratio/high_mean": 0.0006531047092721565, "clip_ratio/low_mean": 0.0003469724724709522, "clip_ratio/low_min": 2.8788466806872748e-05, "clip_ratio/region_mean": 0.0010000771762861405, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2476.0, "completions/mean_length": 690.1451416015625, "completions/mean_terminated_length": 543.4435424804688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.811661807580175, "grad_norm": 0.14893873035907745, "learning_rate": 1e-06, "loss": -0.0137, "num_tokens": 175435499.0, "reward": 0.6049107313156128, "reward_std": 0.15582673251628876, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 301 }, { "clip_ratio/high_max": 0.0017115616665250855, "clip_ratio/high_mean": 0.0006654281801274919, "clip_ratio/low_mean": 0.0005392684470280074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001204696638524183, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 718.4710083007812, "completions/mean_terminated_length": 589.3186645507812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 2.8209912536443147, "grad_norm": 0.17098160088062286, "learning_rate": 1e-06, "loss": -0.0095, "num_tokens": 176027769.0, "reward": 0.5680803656578064, "reward_std": 0.17430992424488068, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 302 }, { "clip_ratio/high_max": 0.0019442137636360712, "clip_ratio/high_mean": 0.0007267302898981143, "clip_ratio/low_mean": 0.0005410526910054614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012677829945459962, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 777.5413208007812, "completions/mean_terminated_length": 638.6290893554688, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 2.8303206997084547, "grad_norm": 0.16408035159111023, "learning_rate": 1e-06, "loss": 0.0028, "num_tokens": 176666854.0, "reward": 0.5602678656578064, "reward_std": 0.18870487809181213, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 303 }, { "clip_ratio/high_max": 0.00199945454733097, "clip_ratio/high_mean": 0.0007962360687088221, "clip_ratio/low_mean": 0.0006074344128137454, "clip_ratio/low_min": 3.534127972670831e-05, "clip_ratio/region_mean": 0.0014036704669706523, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 670.0223388671875, "completions/mean_terminated_length": 555.4279174804688, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 2.8396501457725947, "grad_norm": 5.749591827392578, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 177239434.0, "reward": 0.6328125, "reward_std": 0.18847663700580597, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 304 }, { "clip_ratio/high_max": 0.0016806647399789654, "clip_ratio/high_mean": 0.0006731945659339544, "clip_ratio/low_mean": 0.0004962061566402554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011694007080222946, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 741.9832763671875, "completions/mean_terminated_length": 577.0316162109375, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 2.8489795918367347, "grad_norm": 0.2282479852437973, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 177831483.0, "reward": 0.5859375, "reward_std": 0.17882534861564636, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 305 }, { "clip_ratio/high_max": 0.001882583059341414, "clip_ratio/high_mean": 0.0007573571419925429, "clip_ratio/low_mean": 0.0005209851324252668, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012783422935171984, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1929.0, "completions/mean_length": 687.8471069335938, "completions/mean_terminated_length": 541.0465698242188, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 2.8583090379008746, "grad_norm": 0.17052477598190308, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 178379378.0, "reward": 0.6261160969734192, "reward_std": 0.17746736109256744, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 306 }, { "clip_ratio/high_max": 0.0017728673519741278, "clip_ratio/high_mean": 0.0006252272169149364, "clip_ratio/low_mean": 0.0006276568656176096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001252884067071136, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3863.0, "completions/mean_length": 691.1953735351562, "completions/mean_terminated_length": 561.0, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 2.8676384839650146, "grad_norm": 0.16564425826072693, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 178947081.0, "reward": 0.5636160969734192, "reward_std": 0.16330061852931976, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 307 }, { "clip_ratio/high_max": 0.001929381880472647, "clip_ratio/high_mean": 0.0007164663511503022, "clip_ratio/low_mean": 0.0004968561765963386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001213322513649473, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3813.0, "completions/mean_length": 705.3750610351562, "completions/mean_terminated_length": 555.2074584960938, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 2.8769679300291546, "grad_norm": 0.17670652270317078, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 179510257.0, "reward": 0.613839328289032, "reward_std": 0.15293031930923462, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 308 }, { "clip_ratio/high_max": 0.0018446204776410013, "clip_ratio/high_mean": 0.0006679304315184709, "clip_ratio/low_mean": 0.0005754844419243454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012434148538886802, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 3943.0, "completions/mean_length": 654.8170166015625, "completions/mean_terminated_length": 547.8987426757812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 2.8862973760932946, "grad_norm": 0.1854640245437622, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 180073517.0, "reward": 0.5558035969734192, "reward_std": 0.18201416730880737, "rewards/verify_math_reward/mean": 0.5558035969734192, "rewards/verify_math_reward/std": 0.49715372920036316, "step": 309 }, { "clip_ratio/high_max": 0.0014908744633430615, "clip_ratio/high_mean": 0.0006075170895201154, "clip_ratio/low_mean": 0.0005944760378042702, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012019931309623644, "completions/clipped_ratio": 0.025669642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3694.0, "completions/mean_length": 691.9676513671875, "completions/mean_terminated_length": 602.2852783203125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 2.8956268221574346, "grad_norm": 0.1460346132516861, "learning_rate": 1e-06, "loss": 0.0102, "num_tokens": 180684648.0, "reward": 0.5602678656578064, "reward_std": 0.17697951197624207, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317415237427, "step": 310 }, { "clip_ratio/high_max": 0.0016144792025443166, "clip_ratio/high_mean": 0.0005706704141630325, "clip_ratio/low_mean": 0.00040935545212050783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009800258740142453, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3947.0, "completions/mean_length": 660.5480346679688, "completions/mean_terminated_length": 541.5369262695312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 2.904956268221574, "grad_norm": 0.14667034149169922, "learning_rate": 1e-06, "loss": -0.0159, "num_tokens": 181252803.0, "reward": 0.6495535969734192, "reward_std": 0.14507704973220825, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 311 }, { "clip_ratio/high_max": 0.00180345787157421, "clip_ratio/high_mean": 0.0005535933951250627, "clip_ratio/low_mean": 0.000350333871665498, "clip_ratio/low_min": 1.0835645298357122e-05, "clip_ratio/region_mean": 0.0009039272681548027, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 665.2545166015625, "completions/mean_terminated_length": 566.7830200195312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 2.914285714285714, "grad_norm": 0.14752334356307983, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 181836815.0, "reward": 0.6350446939468384, "reward_std": 0.13388299942016602, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 312 }, { "clip_ratio/high_max": 0.0016972209341474809, "clip_ratio/high_mean": 0.0006843325045338133, "clip_ratio/low_mean": 0.00048033437133199186, "clip_ratio/low_min": 1.7740561816026457e-05, "clip_ratio/region_mean": 0.0011646668936009519, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 795.1808471679688, "completions/mean_terminated_length": 604.2243041992188, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 2.923615160349854, "grad_norm": 0.14472293853759766, "learning_rate": 1e-06, "loss": -0.0274, "num_tokens": 182442249.0, "reward": 0.5546875, "reward_std": 0.169460728764534, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 313 }, { "clip_ratio/high_max": 0.002174861918319948, "clip_ratio/high_mean": 0.0008585118412156589, "clip_ratio/low_mean": 0.0007678492438571993, "clip_ratio/low_min": 6.706338263029465e-05, "clip_ratio/region_mean": 0.0016263610741589218, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3953.0, "completions/mean_length": 731.5000610351562, "completions/mean_terminated_length": 598.79345703125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 2.932944606413994, "grad_norm": 0.19401288032531738, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 183047881.0, "reward": 0.5535714626312256, "reward_std": 0.2113219052553177, "rewards/verify_math_reward/mean": 0.5535714030265808, "rewards/verify_math_reward/std": 0.4973995089530945, "step": 314 }, { "clip_ratio/high_max": 0.0018430207128403708, "clip_ratio/high_mean": 0.0007873630984249758, "clip_ratio/low_mean": 0.0005917095313634491, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001379072607960552, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3411.0, "completions/mean_length": 711.974365234375, "completions/mean_terminated_length": 557.9755249023438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 2.942274052478134, "grad_norm": 0.16679641604423523, "learning_rate": 1e-06, "loss": -0.0196, "num_tokens": 183620778.0, "reward": 0.5703125, "reward_std": 0.18329225480556488, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 315 }, { "clip_ratio/high_max": 0.0018565994505479466, "clip_ratio/high_mean": 0.000691774314873328, "clip_ratio/low_mean": 0.0005710573195756297, "clip_ratio/low_min": 4.674640695156995e-05, "clip_ratio/region_mean": 0.0012628316508198623, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 626.6138916015625, "completions/mean_terminated_length": 551.4503784179688, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 2.951603498542274, "grad_norm": 0.17913031578063965, "learning_rate": 1e-06, "loss": -0.0041, "num_tokens": 184199472.0, "reward": 0.6339285969734192, "reward_std": 0.18355371057987213, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 316 }, { "clip_ratio/high_max": 0.0019007811752089765, "clip_ratio/high_mean": 0.0007266173579409951, "clip_ratio/low_mean": 0.0006725180865032598, "clip_ratio/low_min": 3.9150353586592246e-05, "clip_ratio/region_mean": 0.0013991354608151596, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3775.0, "completions/mean_length": 740.2120971679688, "completions/mean_terminated_length": 615.9236450195312, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 2.960932944606414, "grad_norm": 0.17382897436618805, "learning_rate": 1e-06, "loss": 0.0038, "num_tokens": 184818942.0, "reward": 0.5636160969734192, "reward_std": 0.22244179248809814, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 317 }, { "clip_ratio/high_max": 0.0022227749395824503, "clip_ratio/high_mean": 0.0008015793300728546, "clip_ratio/low_mean": 0.0006286112911766395, "clip_ratio/low_min": 1.3640331417263951e-05, "clip_ratio/region_mean": 0.0014301905903266743, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 2776.0, "completions/mean_length": 617.2410888671875, "completions/mean_terminated_length": 529.675048828125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 2.970262390670554, "grad_norm": 0.16559240221977234, "learning_rate": 1e-06, "loss": -0.0096, "num_tokens": 185371030.0, "reward": 0.645089328289032, "reward_std": 0.17701905965805054, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 318 }, { "clip_ratio/high_max": 0.0020268753651180305, "clip_ratio/high_mean": 0.0007223895572678884, "clip_ratio/low_mean": 0.0005586875458902796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001281077100429684, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3976.0, "completions/mean_length": 707.771240234375, "completions/mean_terminated_length": 578.209716796875, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 2.979591836734694, "grad_norm": 0.17613056302070618, "learning_rate": 1e-06, "loss": 0.0188, "num_tokens": 185959809.0, "reward": 0.6015625, "reward_std": 0.18498292565345764, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 319 }, { "clip_ratio/high_max": 0.0018941805028589442, "clip_ratio/high_mean": 0.0007335241207329091, "clip_ratio/low_mean": 0.0004958659656040254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012293900945223868, "completions/clipped_ratio": 0.022321428571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 662.2310791015625, "completions/mean_terminated_length": 583.83447265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 2.9889212827988336, "grad_norm": 0.15626439452171326, "learning_rate": 1e-06, "loss": 0.0036, "num_tokens": 186567952.0, "reward": 0.6584821939468384, "reward_std": 0.1782250851392746, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 320 }, { "clip_ratio/high_max": 0.0019759551250899676, "clip_ratio/high_mean": 0.0006801464633099386, "clip_ratio/low_mean": 0.00046020287391002057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011403493299440015, "completions/clipped_ratio": 0.05965909090909094, "completions/max_length": 4096.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 757.54833984375, "completions/mean_terminated_length": 545.7432250976562, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 2.9982507288629736, "grad_norm": 0.15620289742946625, "learning_rate": 1e-06, "loss": -0.0242, "num_tokens": 187144377.0, "reward": 0.6183035969734192, "reward_std": 0.14376364648342133, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 321 }, { "clip_ratio/high_max": 0.001879252122307662, "clip_ratio/high_mean": 0.0007739460415905342, "clip_ratio/low_mean": 0.0005718898901250213, "clip_ratio/low_min": 1.1040452591259964e-05, "clip_ratio/region_mean": 0.0013458359462674707, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3783.0, "completions/mean_length": 873.4129638671875, "completions/mean_terminated_length": 646.2532348632812, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 3.00932944606414, "grad_norm": 0.14187264442443848, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 187775579.0, "reward": 0.5189732313156128, "reward_std": 0.19692833721637726, "rewards/verify_math_reward/mean": 0.5189732313156128, "rewards/verify_math_reward/std": 0.49991893768310547, "step": 322 }, { "clip_ratio/high_max": 0.0018968095246236771, "clip_ratio/high_mean": 0.0007414832389258663, "clip_ratio/low_mean": 0.0005251816055533709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012666648508457001, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3351.0, "completions/mean_length": 741.9219360351562, "completions/mean_terminated_length": 593.3729858398438, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 3.01865889212828, "grad_norm": 0.170277401804924, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 188367189.0, "reward": 0.6227678656578064, "reward_std": 0.17833498120307922, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 323 }, { "clip_ratio/high_max": 0.001739174982503755, "clip_ratio/high_mean": 0.0005863178848812822, "clip_ratio/low_mean": 0.00048157983019336825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010678976977942511, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3532.0, "completions/mean_length": 733.2902221679688, "completions/mean_terminated_length": 563.77490234375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.02798833819242, "grad_norm": 0.15343502163887024, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 188937753.0, "reward": 0.5837053656578064, "reward_std": 0.16179178655147552, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 324 }, { "clip_ratio/high_max": 0.0017487070144852623, "clip_ratio/high_mean": 0.0006142963193269679, "clip_ratio/low_mean": 0.0005022132290832815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011165095456817653, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 664.40625, "completions/mean_terminated_length": 561.8528442382812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.03731778425656, "grad_norm": 0.16153734922409058, "learning_rate": 1e-06, "loss": -0.018, "num_tokens": 189517157.0, "reward": 0.6205357313156128, "reward_std": 0.15932045876979828, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 325 }, { "clip_ratio/high_max": 0.0015828797986614518, "clip_ratio/high_mean": 0.0005282797174004372, "clip_ratio/low_mean": 0.00034335495331561106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008716346837900346, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2692.0, "completions/mean_length": 670.6138916015625, "completions/mean_terminated_length": 535.5057983398438, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.0466472303206995, "grad_norm": 0.13909634947776794, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 190059027.0, "reward": 0.6361607313156128, "reward_std": 0.12869539856910706, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 326 }, { "clip_ratio/high_max": 0.001953350627445616, "clip_ratio/high_mean": 0.0006827437955507776, "clip_ratio/low_mean": 0.0005384850701375399, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012212288565933704, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 766.1797485351562, "completions/mean_terminated_length": 598.3223876953125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 3.0559766763848395, "grad_norm": 0.16334910690784454, "learning_rate": 1e-06, "loss": -0.0051, "num_tokens": 190661548.0, "reward": 0.59375, "reward_std": 0.17979811131954193, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 327 }, { "clip_ratio/high_max": 0.0017032836185535416, "clip_ratio/high_mean": 0.0006889922497066436, "clip_ratio/low_mean": 0.0005080533292129985, "clip_ratio/low_min": 1.1794678357546218e-05, "clip_ratio/region_mean": 0.001197045603475999, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 688.1049194335938, "completions/mean_terminated_length": 565.9722290039062, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 3.0653061224489795, "grad_norm": 0.19825489819049835, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 191243866.0, "reward": 0.6205357313156128, "reward_std": 0.1688220202922821, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 328 }, { "clip_ratio/high_max": 0.0013345623265195172, "clip_ratio/high_mean": 0.0004686905158450827, "clip_ratio/low_mean": 0.0005615261945877137, "clip_ratio/low_min": 2.133105772372801e-05, "clip_ratio/region_mean": 0.0010302167138434015, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2409.0, "completions/mean_length": 656.3917846679688, "completions/mean_terminated_length": 524.8656005859375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.0746355685131195, "grad_norm": 0.15823489427566528, "learning_rate": 1e-06, "loss": -0.0091, "num_tokens": 191787633.0, "reward": 0.6439732313156128, "reward_std": 0.11693863570690155, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 329 }, { "clip_ratio/high_max": 0.001635613909456879, "clip_ratio/high_mean": 0.0006411001641026814, "clip_ratio/low_mean": 0.0006498393977381056, "clip_ratio/low_min": 2.5415084564883728e-05, "clip_ratio/region_mean": 0.0012909395663882606, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 717.8895263671875, "completions/mean_terminated_length": 596.8242797851562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 3.0839650145772595, "grad_norm": 0.1525290608406067, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 192399622.0, "reward": 0.5334821939468384, "reward_std": 0.15778063237667084, "rewards/verify_math_reward/mean": 0.5334821343421936, "rewards/verify_math_reward/std": 0.49915632605552673, "step": 330 }, { "clip_ratio/high_max": 0.002810267309541814, "clip_ratio/high_mean": 0.0009091607262234902, "clip_ratio/low_mean": 0.0005943442874922766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015035050055303145, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3285.0, "completions/mean_length": 698.388427734375, "completions/mean_terminated_length": 552.0419311523438, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 3.0932944606413995, "grad_norm": 0.1841270923614502, "learning_rate": 1e-06, "loss": -0.0076, "num_tokens": 192960602.0, "reward": 0.6640625, "reward_std": 0.18806366622447968, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 331 }, { "clip_ratio/high_max": 0.0013865030960005242, "clip_ratio/high_mean": 0.000526547885783657, "clip_ratio/low_mean": 0.00039444234425900504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009209902309521567, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 673.2689819335938, "completions/mean_terminated_length": 558.7831420898438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 3.1026239067055394, "grad_norm": 0.14227311313152313, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 193532283.0, "reward": 0.6428571939468384, "reward_std": 0.13996823132038116, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 332 }, { "clip_ratio/high_max": 0.0016808614018373191, "clip_ratio/high_mean": 0.0005795997749373782, "clip_ratio/low_mean": 0.0004085123721324635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009881121695798356, "completions/clipped_ratio": 0.0200892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2650.0, "completions/mean_length": 593.1730346679688, "completions/mean_terminated_length": 521.361083984375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.1119533527696794, "grad_norm": 0.14072465896606445, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 194085366.0, "reward": 0.6584821939468384, "reward_std": 0.13587605953216553, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 333 }, { "clip_ratio/high_max": 0.0016859312891028821, "clip_ratio/high_mean": 0.000606731589869014, "clip_ratio/low_mean": 0.0006843964201834751, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012911280027765315, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 730.029052734375, "completions/mean_terminated_length": 564.4894409179688, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 3.1212827988338194, "grad_norm": 0.1528729945421219, "learning_rate": 1e-06, "loss": 0.0052, "num_tokens": 194668272.0, "reward": 0.5881696939468384, "reward_std": 0.16645807027816772, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 334 }, { "clip_ratio/high_max": 0.0016520459721505176, "clip_ratio/high_mean": 0.0006331761796900537, "clip_ratio/low_mean": 0.0005587463592746644, "clip_ratio/low_min": 1.7099862816394307e-05, "clip_ratio/region_mean": 0.001191922536236234, "completions/clipped_ratio": 0.021205357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 594.6585083007812, "completions/mean_terminated_length": 518.802734375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 3.130612244897959, "grad_norm": 0.17078544199466705, "learning_rate": 1e-06, "loss": 0.0051, "num_tokens": 195204806.0, "reward": 0.6819196939468384, "reward_std": 0.16244256496429443, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 335 }, { "clip_ratio/high_max": 0.001948820790858008, "clip_ratio/high_mean": 0.0006329362786345882, "clip_ratio/low_mean": 0.0006931475700184819, "clip_ratio/low_min": 3.0884128136676736e-05, "clip_ratio/region_mean": 0.0013260838386486284, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 683.0938110351562, "completions/mean_terminated_length": 585.1343383789062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.139941690962099, "grad_norm": 0.16676604747772217, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 195804794.0, "reward": 0.5758928656578064, "reward_std": 0.18892493844032288, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 336 }, { "clip_ratio/high_max": 0.0018137805673177354, "clip_ratio/high_mean": 0.0005947059671598254, "clip_ratio/low_mean": 0.0005975330741421203, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011922390367544722, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2237.0, "completions/mean_length": 670.575927734375, "completions/mean_terminated_length": 523.0314331054688, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.149271137026239, "grad_norm": 0.16413962841033936, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 196341382.0, "reward": 0.6741071939468384, "reward_std": 0.13685427606105804, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 337 }, { "clip_ratio/high_max": 0.0020174984420009423, "clip_ratio/high_mean": 0.000718235234671738, "clip_ratio/low_mean": 0.0005036646816733992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012218999254400842, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 884.7723388671875, "completions/mean_terminated_length": 641.9063720703125, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 3.158600583090379, "grad_norm": 0.15254896879196167, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 196990674.0, "reward": 0.494419664144516, "reward_std": 0.1596214473247528, "rewards/verify_math_reward/mean": 0.4944196343421936, "rewards/verify_math_reward/std": 0.5002480745315552, "step": 338 }, { "clip_ratio/high_max": 0.0017272997283726, "clip_ratio/high_mean": 0.000542580040928442, "clip_ratio/low_mean": 0.0003449479145274381, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008875279399944702, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3526.0, "completions/mean_length": 761.5792846679688, "completions/mean_terminated_length": 597.59130859375, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.167930029154519, "grad_norm": 0.14441409707069397, "learning_rate": 1e-06, "loss": -0.0018, "num_tokens": 197593545.0, "reward": 0.6037946939468384, "reward_std": 0.14534735679626465, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 339 }, { "clip_ratio/high_max": 0.00170107834492228, "clip_ratio/high_mean": 0.0005152284193172818, "clip_ratio/low_mean": 0.000506231612234842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010214600151812192, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3342.0, "completions/mean_length": 729.700927734375, "completions/mean_terminated_length": 572.3971557617188, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.177259475218659, "grad_norm": 0.1728869378566742, "learning_rate": 1e-06, "loss": -0.0024, "num_tokens": 198168413.0, "reward": 0.6339285969734192, "reward_std": 0.16506867110729218, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 340 }, { "clip_ratio/high_max": 0.0016669321157678496, "clip_ratio/high_mean": 0.0006166034363559447, "clip_ratio/low_mean": 0.0004913312923235935, "clip_ratio/low_min": 1.768033871485386e-05, "clip_ratio/region_mean": 0.0011079347132181283, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 697.200927734375, "completions/mean_terminated_length": 587.5621948242188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.186588921282799, "grad_norm": 0.1714445948600769, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 198761089.0, "reward": 0.6116071939468384, "reward_std": 0.15785479545593262, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 341 }, { "clip_ratio/high_max": 0.0018787250737659633, "clip_ratio/high_mean": 0.0007277342265297193, "clip_ratio/low_mean": 0.0005612047139038623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012889389363408554, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3681.0, "completions/mean_length": 762.3873291015625, "completions/mean_terminated_length": 606.6109619140625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 3.195918367346939, "grad_norm": 0.15971846878528595, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 199373820.0, "reward": 0.582589328289032, "reward_std": 0.1716071218252182, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 342 }, { "clip_ratio/high_max": 0.0019077946344623342, "clip_ratio/high_mean": 0.0007565264622826362, "clip_ratio/low_mean": 0.0004942814248352079, "clip_ratio/low_min": 1.1316313248244114e-05, "clip_ratio/region_mean": 0.0012508078943938017, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 684.2467041015625, "completions/mean_terminated_length": 578.2427978515625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.205247813411079, "grad_norm": 0.16806165874004364, "learning_rate": 1e-06, "loss": 0.0085, "num_tokens": 199962417.0, "reward": 0.637276828289032, "reward_std": 0.15710733830928802, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 343 }, { "clip_ratio/high_max": 0.0015897958546702284, "clip_ratio/high_mean": 0.0005700374276784714, "clip_ratio/low_mean": 0.0005594242948063766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001129461725213332, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 711.3158569335938, "completions/mean_terminated_length": 590.0150146484375, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.2145772594752184, "grad_norm": 0.15745463967323303, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 200561340.0, "reward": 0.5658482313156128, "reward_std": 0.15503577888011932, "rewards/verify_math_reward/mean": 0.5658482313156128, "rewards/verify_math_reward/std": 0.49592188000679016, "step": 344 }, { "clip_ratio/high_max": 0.0017465214186813682, "clip_ratio/high_mean": 0.0007276929718500469, "clip_ratio/low_mean": 0.0005883022058696952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013159951849956997, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 787.1819458007812, "completions/mean_terminated_length": 612.2150268554688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.2239067055393584, "grad_norm": 0.16699855029582977, "learning_rate": 1e-06, "loss": -0.0143, "num_tokens": 201177119.0, "reward": 0.5502232313156128, "reward_std": 0.17239172756671906, "rewards/verify_math_reward/mean": 0.5502232313156128, "rewards/verify_math_reward/std": 0.49774909019470215, "step": 345 }, { "clip_ratio/high_max": 0.00131191932814545, "clip_ratio/high_mean": 0.00046841485527693294, "clip_ratio/low_mean": 0.00036787522867598454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008362900734937284, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3025.0, "completions/mean_length": 629.9754638671875, "completions/mean_terminated_length": 546.7908325195312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.2332361516034984, "grad_norm": 0.17488166689872742, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 201739513.0, "reward": 0.6897321939468384, "reward_std": 0.11836010217666626, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 346 }, { "clip_ratio/high_max": 0.0015291980453184806, "clip_ratio/high_mean": 0.0004885729395027738, "clip_ratio/low_mean": 0.0003527211426899157, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008412940915150102, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 691.177490234375, "completions/mean_terminated_length": 569.1549072265625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.2425655976676384, "grad_norm": 0.1976059377193451, "learning_rate": 1e-06, "loss": -0.0016, "num_tokens": 202323064.0, "reward": 0.5926339626312256, "reward_std": 0.12576760351657867, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 347 }, { "clip_ratio/high_max": 0.002212184146628715, "clip_ratio/high_mean": 0.0008297939384647179, "clip_ratio/low_mean": 0.0005541007749343407, "clip_ratio/low_min": 1.3337601558305323e-05, "clip_ratio/region_mean": 0.001383894748869352, "completions/clipped_ratio": 0.030133928571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 645.5814819335938, "completions/mean_terminated_length": 538.3762817382812, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 3.2518950437317784, "grad_norm": 0.1889650672674179, "learning_rate": 1e-06, "loss": -0.0107, "num_tokens": 202882113.0, "reward": 0.6350446939468384, "reward_std": 0.18054921925067902, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 348 }, { "clip_ratio/high_max": 0.0015015474346000701, "clip_ratio/high_mean": 0.0005599265214186744, "clip_ratio/low_mean": 0.0006452595171140274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012051860612700693, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 843.4152221679688, "completions/mean_terminated_length": 643.0189819335938, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 3.2612244897959184, "grad_norm": 0.16263212263584137, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 203519669.0, "reward": 0.4799107313156128, "reward_std": 0.17321588099002838, "rewards/verify_math_reward/mean": 0.4799107015132904, "rewards/verify_math_reward/std": 0.4998753070831299, "step": 349 }, { "clip_ratio/high_max": 0.0015449030106537975, "clip_ratio/high_mean": 0.0005975261556159239, "clip_ratio/low_mean": 0.0005389400521380594, "clip_ratio/low_min": 3.970774923800491e-05, "clip_ratio/region_mean": 0.0011364662168489303, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4019.0, "completions/mean_length": 683.8873291015625, "completions/mean_terminated_length": 565.6847534179688, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.2705539358600584, "grad_norm": 0.14683841168880463, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 204102016.0, "reward": 0.5848214626312256, "reward_std": 0.15405938029289246, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 350 }, { "clip_ratio/high_max": 0.0017227121534233447, "clip_ratio/high_mean": 0.0005658843256242108, "clip_ratio/low_mean": 0.00048291549046552973, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010487997969903518, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1883.0, "completions/mean_length": 751.5647583007812, "completions/mean_terminated_length": 549.7112426757812, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.2798833819241984, "grad_norm": 0.2624254822731018, "learning_rate": 1e-06, "loss": -0.0104, "num_tokens": 204653178.0, "reward": 0.5680803656578064, "reward_std": 0.139596626162529, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 351 }, { "clip_ratio/high_max": 0.002103293405525619, "clip_ratio/high_mean": 0.0008459727014269447, "clip_ratio/low_mean": 0.0005525255082829972, "clip_ratio/low_min": 1.8603959688334726e-05, "clip_ratio/region_mean": 0.0013984982288093306, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 829.4297485351562, "completions/mean_terminated_length": 648.5948486328125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 3.2892128279883384, "grad_norm": 0.2469792664051056, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 205298483.0, "reward": 0.5859375, "reward_std": 0.19163267314434052, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 352 }, { "clip_ratio/high_max": 0.002046771056484431, "clip_ratio/high_mean": 0.0007821938470442547, "clip_ratio/low_mean": 0.0005993165068503004, "clip_ratio/low_min": 4.450034612091258e-05, "clip_ratio/region_mean": 0.0013815103739034384, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 1960.0, "completions/mean_length": 738.4074096679688, "completions/mean_terminated_length": 573.2798461914062, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 3.298542274052478, "grad_norm": 0.18035072088241577, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 205881584.0, "reward": 0.640625, "reward_std": 0.1795370876789093, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 353 }, { "clip_ratio/high_max": 0.0022992012891336344, "clip_ratio/high_mean": 0.0008977718935057055, "clip_ratio/low_mean": 0.0005703734134385741, "clip_ratio/low_min": 3.3743564927135594e-05, "clip_ratio/region_mean": 0.0014681453103548847, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2866.0, "completions/mean_length": 713.4152221679688, "completions/mean_terminated_length": 592.1895751953125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 3.307871720116618, "grad_norm": 0.1876998245716095, "learning_rate": 1e-06, "loss": 0.0073, "num_tokens": 206484748.0, "reward": 0.5814732313156128, "reward_std": 0.19425947964191437, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 354 }, { "clip_ratio/high_max": 0.001361861559416866, "clip_ratio/high_mean": 0.00046019937053642934, "clip_ratio/low_mean": 0.00038518078054039506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008453801601717714, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1977.0, "completions/mean_length": 601.1495971679688, "completions/mean_terminated_length": 500.8381042480469, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 3.317201166180758, "grad_norm": 0.14762645959854126, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 206998346.0, "reward": 0.707589328289032, "reward_std": 0.12253419309854507, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 355 }, { "clip_ratio/high_max": 0.0020514542047749273, "clip_ratio/high_mean": 0.0008852325827319873, "clip_ratio/low_mean": 0.0005880790904484456, "clip_ratio/low_min": 1.4937858395569492e-05, "clip_ratio/region_mean": 0.0014733116477145813, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3683.0, "completions/mean_length": 721.4219360351562, "completions/mean_terminated_length": 571.9650268554688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.326530612244898, "grad_norm": 0.19541537761688232, "learning_rate": 1e-06, "loss": -0.009, "num_tokens": 207579852.0, "reward": 0.6729910969734192, "reward_std": 0.20410872995853424, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 356 }, { "clip_ratio/high_max": 0.0015374102658824995, "clip_ratio/high_mean": 0.0005859010707354173, "clip_ratio/low_mean": 0.0004981515567124006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010840526410902385, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3641.0, "completions/mean_length": 748.0814819335938, "completions/mean_terminated_length": 616.0289916992188, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 3.335860058309038, "grad_norm": 0.1314224898815155, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 208207973.0, "reward": 0.5680803656578064, "reward_std": 0.14887316524982452, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 357 }, { "clip_ratio/high_max": 0.0016378237523895223, "clip_ratio/high_mean": 0.0006037754192220746, "clip_ratio/low_mean": 0.00047187791460601147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001075653333828086, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2169.0, "completions/mean_length": 656.7522583007812, "completions/mean_terminated_length": 529.3726806640625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.345189504373178, "grad_norm": 0.16750304400920868, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 208749119.0, "reward": 0.6428571939468384, "reward_std": 0.15053103864192963, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 358 }, { "clip_ratio/high_max": 0.001733257890009554, "clip_ratio/high_mean": 0.0007255245109263342, "clip_ratio/low_mean": 0.0005510935743586742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001276618058909662, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3990.0, "completions/mean_length": 762.9185791015625, "completions/mean_terminated_length": 615.299560546875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 3.354518950437318, "grad_norm": 0.1626165807247162, "learning_rate": 1e-06, "loss": -0.015, "num_tokens": 209371766.0, "reward": 0.5881696939468384, "reward_std": 0.1692011058330536, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 359 }, { "clip_ratio/high_max": 0.001696944389550481, "clip_ratio/high_mean": 0.000568242248846218, "clip_ratio/low_mean": 0.0005102288118905562, "clip_ratio/low_min": 1.1991557585133705e-05, "clip_ratio/region_mean": 0.0010784710648295004, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3971.0, "completions/mean_length": 750.646240234375, "completions/mean_terminated_length": 582.005859375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.363848396501458, "grad_norm": 0.1634666621685028, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 209959129.0, "reward": 0.6261160969734192, "reward_std": 0.15488353371620178, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 360 }, { "clip_ratio/high_max": 0.0017860847256088164, "clip_ratio/high_mean": 0.000683918433423969, "clip_ratio/low_mean": 0.000540567531970737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012244859753991477, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2937.0, "completions/mean_length": 655.0178833007812, "completions/mean_terminated_length": 531.6994018554688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.373177842565598, "grad_norm": 0.19383962452411652, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 210519769.0, "reward": 0.6361607313156128, "reward_std": 0.15931904315948486, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 361 }, { "clip_ratio/high_max": 0.0021199771654210053, "clip_ratio/high_mean": 0.0008998619014164433, "clip_ratio/low_mean": 0.000567416234389384, "clip_ratio/low_min": 1.3975849469716195e-05, "clip_ratio/region_mean": 0.0014672781180706806, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 688.5949096679688, "completions/mean_terminated_length": 529.3703002929688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.3825072886297374, "grad_norm": 0.4614749252796173, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 211059062.0, "reward": 0.6573660969734192, "reward_std": 0.1775440275669098, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 362 }, { "clip_ratio/high_max": 0.001744619985402096, "clip_ratio/high_mean": 0.0006723825445078546, "clip_ratio/low_mean": 0.00045334602782531874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011257286096224561, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3811.0, "completions/mean_length": 699.0892944335938, "completions/mean_terminated_length": 597.5723876953125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.3918367346938774, "grad_norm": 0.16083276271820068, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 211669046.0, "reward": 0.5580357313156128, "reward_std": 0.17333240807056427, "rewards/verify_math_reward/mean": 0.5580357313156128, "rewards/verify_math_reward/std": 0.49689778685569763, "step": 363 }, { "clip_ratio/high_max": 0.0016588986036367714, "clip_ratio/high_mean": 0.0005556809283007169, "clip_ratio/low_mean": 0.000585030695219757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011407116217014845, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 692.0625610351562, "completions/mean_terminated_length": 545.4435424804688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.4011661807580174, "grad_norm": 0.1567852795124054, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 212235430.0, "reward": 0.6015625, "reward_std": 0.14402396976947784, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 364 }, { "clip_ratio/high_max": 0.001806083724659402, "clip_ratio/high_mean": 0.0006226782388694119, "clip_ratio/low_mean": 0.0004972527485733735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011199309992662165, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 699.0256958007812, "completions/mean_terminated_length": 573.2117919921875, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 3.4104956268221573, "grad_norm": 0.17135797441005707, "learning_rate": 1e-06, "loss": -0.0129, "num_tokens": 212815645.0, "reward": 0.6350446939468384, "reward_std": 0.160858154296875, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 365 }, { "clip_ratio/high_max": 0.002020967825956177, "clip_ratio/high_mean": 0.0008052228022279451, "clip_ratio/low_mean": 0.0006546931972479797, "clip_ratio/low_min": 6.945713084860472e-05, "clip_ratio/region_mean": 0.0014599159840145148, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2577.0, "completions/mean_length": 745.9564819335938, "completions/mean_terminated_length": 589.412353515625, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 3.4198250728862973, "grad_norm": 0.17605097591876984, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 213422182.0, "reward": 0.5680803656578064, "reward_std": 0.188966304063797, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 366 }, { "clip_ratio/high_max": 0.001761342540703481, "clip_ratio/high_mean": 0.0006754804999218322, "clip_ratio/low_mean": 0.0005779823604825651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012534628731373232, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 767.8092041015625, "completions/mean_terminated_length": 600.0339965820312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.4291545189504373, "grad_norm": 0.22071969509124756, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 214032219.0, "reward": 0.5703125, "reward_std": 0.17127405107021332, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 367 }, { "clip_ratio/high_max": 0.002016289272432914, "clip_ratio/high_mean": 0.0007822973329894012, "clip_ratio/low_mean": 0.0005686713811883237, "clip_ratio/low_min": 1.843114114308264e-05, "clip_ratio/region_mean": 0.0013509687087207567, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3529.0, "completions/mean_length": 641.6908569335938, "completions/mean_terminated_length": 538.4586181640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.4384839650145773, "grad_norm": 0.17655901610851288, "learning_rate": 1e-06, "loss": -0.0027, "num_tokens": 214584758.0, "reward": 0.6875000596046448, "reward_std": 0.1562718003988266, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 368 }, { "clip_ratio/high_max": 0.0018473334785085171, "clip_ratio/high_mean": 0.000736389138182858, "clip_ratio/low_mean": 0.0005798566762678092, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001316245790803805, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 747.1964721679688, "completions/mean_terminated_length": 553.4639892578125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.4478134110787173, "grad_norm": 0.1764877438545227, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 215153894.0, "reward": 0.578125, "reward_std": 0.17141534388065338, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 369 }, { "clip_ratio/high_max": 0.0019431889377301559, "clip_ratio/high_mean": 0.0007811395007593092, "clip_ratio/low_mean": 0.0004999987040719134, "clip_ratio/low_min": 2.7250073799223173e-05, "clip_ratio/region_mean": 0.0012811382148356643, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 759.497802734375, "completions/mean_terminated_length": 583.0669555664062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.4571428571428573, "grad_norm": 0.1896611750125885, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 215735948.0, "reward": 0.582589328289032, "reward_std": 0.15631386637687683, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 370 }, { "clip_ratio/high_max": 0.001472057312639663, "clip_ratio/high_mean": 0.0005646050176437711, "clip_ratio/low_mean": 0.0006057319351384649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011703369455062784, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 637.8939819335938, "completions/mean_terminated_length": 522.2249145507812, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.466472303206997, "grad_norm": 0.19203650951385498, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 216273925.0, "reward": 0.6473214626312256, "reward_std": 0.17930956184864044, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 371 }, { "clip_ratio/high_max": 0.002444472564093303, "clip_ratio/high_mean": 0.0008564699528506026, "clip_ratio/low_mean": 0.0005713572008971823, "clip_ratio/low_min": 5.474257341120392e-05, "clip_ratio/region_mean": 0.0014278271701186895, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3622.0, "completions/mean_length": 741.2332763671875, "completions/mean_terminated_length": 567.982421875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 3.4758017492711373, "grad_norm": 0.19386622309684753, "learning_rate": 1e-06, "loss": -0.014, "num_tokens": 216853838.0, "reward": 0.6004464626312256, "reward_std": 0.18768569827079773, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 372 }, { "clip_ratio/high_max": 0.001838492895331001, "clip_ratio/high_mean": 0.0006286235993684386, "clip_ratio/low_mean": 0.0004937572757626185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011223808833165094, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3755.0, "completions/mean_length": 713.2857666015625, "completions/mean_terminated_length": 555.2149658203125, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 3.485131195335277, "grad_norm": 0.16348233819007874, "learning_rate": 1e-06, "loss": 0.0125, "num_tokens": 217418470.0, "reward": 0.6350446939468384, "reward_std": 0.13917727768421173, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 373 }, { "clip_ratio/high_max": 0.0015413453347719042, "clip_ratio/high_mean": 0.0005610847715615819, "clip_ratio/low_mean": 0.0005258913606667193, "clip_ratio/low_min": 2.9050039302092046e-05, "clip_ratio/region_mean": 0.0010869761172216386, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 715.450927734375, "completions/mean_terminated_length": 553.3427124023438, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 3.494460641399417, "grad_norm": 0.18832284212112427, "learning_rate": 1e-06, "loss": -0.0157, "num_tokens": 217989162.0, "reward": 0.606026828289032, "reward_std": 0.16307058930397034, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 374 }, { "clip_ratio/high_max": 0.0014378242558450438, "clip_ratio/high_mean": 0.0004859341643168591, "clip_ratio/low_mean": 0.0004664748057621182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009524089491605991, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 721.2176513671875, "completions/mean_terminated_length": 559.385986328125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 3.503790087463557, "grad_norm": 0.16173528134822845, "learning_rate": 1e-06, "loss": -0.0233, "num_tokens": 218545413.0, "reward": 0.5647321939468384, "reward_std": 0.13504233956336975, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 375 }, { "clip_ratio/high_max": 0.0018496222655812744, "clip_ratio/high_mean": 0.0007505446428694995, "clip_ratio/low_mean": 0.0005054371913502109, "clip_ratio/low_min": 6.251193826756207e-05, "clip_ratio/region_mean": 0.0012559818023873959, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 734.880615234375, "completions/mean_terminated_length": 602.3074340820312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 3.513119533527697, "grad_norm": 0.18296431005001068, "learning_rate": 1e-06, "loss": -0.0127, "num_tokens": 219153018.0, "reward": 0.6484375, "reward_std": 0.1685284972190857, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 376 }, { "clip_ratio/high_max": 0.00206367682403652, "clip_ratio/high_mean": 0.0007036374481685925, "clip_ratio/low_mean": 0.000642717122900649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013463545838021673, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 760.6295166015625, "completions/mean_terminated_length": 600.6876831054688, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 3.522448979591837, "grad_norm": 0.1939503252506256, "learning_rate": 1e-06, "loss": -0.0124, "num_tokens": 219762902.0, "reward": 0.5993303656578064, "reward_std": 0.19659315049648285, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 377 }, { "clip_ratio/high_max": 0.001997565930651035, "clip_ratio/high_mean": 0.0006845299285487272, "clip_ratio/low_mean": 0.0004570088676700834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011415387962188106, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 773.9631958007812, "completions/mean_terminated_length": 577.6253051757812, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.5317784256559768, "grad_norm": 0.1786443591117859, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 220346429.0, "reward": 0.5948660969734192, "reward_std": 0.16927777230739594, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 378 }, { "clip_ratio/high_max": 0.0020954741012246814, "clip_ratio/high_mean": 0.000722040294931503, "clip_ratio/low_mean": 0.0006658451802650234, "clip_ratio/low_min": 1.5451174476766028e-05, "clip_ratio/region_mean": 0.0013878854806534946, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3858.0, "completions/mean_length": 703.0636596679688, "completions/mean_terminated_length": 561.03369140625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 3.5411078717201168, "grad_norm": 0.21019017696380615, "learning_rate": 1e-06, "loss": -0.0123, "num_tokens": 220913558.0, "reward": 0.6495535969734192, "reward_std": 0.17235782742500305, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.47737622261047363, "step": 379 }, { "clip_ratio/high_max": 0.0021704917089664377, "clip_ratio/high_mean": 0.0008289779198094038, "clip_ratio/low_mean": 0.0005503197139660188, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013792976242257282, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 758.1506958007812, "completions/mean_terminated_length": 556.6946411132812, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 3.5504373177842563, "grad_norm": 0.1875956654548645, "learning_rate": 1e-06, "loss": -0.013, "num_tokens": 221484045.0, "reward": 0.5881696939468384, "reward_std": 0.16822652518749237, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 380 }, { "clip_ratio/high_max": 0.0017003353750624228, "clip_ratio/high_mean": 0.0006937202306289691, "clip_ratio/low_mean": 0.0005314323825587053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012251526059117168, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3164.0, "completions/mean_length": 674.8828125, "completions/mean_terminated_length": 539.943115234375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 3.5597667638483967, "grad_norm": 0.19282668828964233, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 222032708.0, "reward": 0.6852678656578064, "reward_std": 0.17795519530773163, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 381 }, { "clip_ratio/high_max": 0.002126079736626707, "clip_ratio/high_mean": 0.0008324973296112148, "clip_ratio/low_mean": 0.0005921694246353582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014246667305997107, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1797.0, "completions/mean_length": 735.1629638671875, "completions/mean_terminated_length": 528.09716796875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 3.5690962099125363, "grad_norm": 0.20518554747104645, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 222569254.0, "reward": 0.6104910969734192, "reward_std": 0.18193930387496948, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 382 }, { "clip_ratio/high_max": 0.0016663078095007222, "clip_ratio/high_mean": 0.0006392496961780125, "clip_ratio/low_mean": 0.0004713299344984989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011105796256742906, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3017.0, "completions/mean_length": 669.7098388671875, "completions/mean_terminated_length": 551.0161743164062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 3.5784256559766763, "grad_norm": 0.182663694024086, "learning_rate": 1e-06, "loss": -0.0088, "num_tokens": 223142442.0, "reward": 0.629464328289032, "reward_std": 0.1594713181257248, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 383 }, { "clip_ratio/high_max": 0.0018950456214952283, "clip_ratio/high_mean": 0.0006564268132933648, "clip_ratio/low_mean": 0.0005129431101522641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011693699016177561, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1848.0, "completions/mean_length": 691.3158569335938, "completions/mean_terminated_length": 519.6846313476562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.5877551020408163, "grad_norm": 0.17683209478855133, "learning_rate": 1e-06, "loss": -0.0094, "num_tokens": 223679253.0, "reward": 0.6506696939468384, "reward_std": 0.14597675204277039, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 384 }, { "clip_ratio/high_max": 0.0014951238190406002, "clip_ratio/high_mean": 0.0005862280304427259, "clip_ratio/low_mean": 0.0006051687805666006, "clip_ratio/low_min": 2.9754819479421712e-05, "clip_ratio/region_mean": 0.0011913968191947788, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 675.0692138671875, "completions/mean_terminated_length": 536.0069580078125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.5970845481049563, "grad_norm": 0.1987566351890564, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 224234027.0, "reward": 0.606026828289032, "reward_std": 0.15458042919635773, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 385 }, { "clip_ratio/high_max": 0.001807525932235876, "clip_ratio/high_mean": 0.0007027957963146036, "clip_ratio/low_mean": 0.0005485896253958344, "clip_ratio/low_min": 2.8737048523908015e-05, "clip_ratio/region_mean": 0.0012513854071585229, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 698.5178833007812, "completions/mean_terminated_length": 560.4088134765625, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 3.6064139941690962, "grad_norm": 0.169415682554245, "learning_rate": 1e-06, "loss": -0.0158, "num_tokens": 224799827.0, "reward": 0.5803571939468384, "reward_std": 0.17153297364711761, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 386 }, { "clip_ratio/high_max": 0.0017395130635122769, "clip_ratio/high_mean": 0.0005787375794170657, "clip_ratio/low_mean": 0.0004144340646234923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009931716522260103, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 700.8873291015625, "completions/mean_terminated_length": 575.1423950195312, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 3.6157434402332362, "grad_norm": 0.1567336469888687, "learning_rate": 1e-06, "loss": -0.0163, "num_tokens": 225387134.0, "reward": 0.6640625, "reward_std": 0.1749839186668396, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 387 }, { "clip_ratio/high_max": 0.0016087754302134272, "clip_ratio/high_mean": 0.0006124790961621329, "clip_ratio/low_mean": 0.0005558429238590179, "clip_ratio/low_min": 2.7581641916185617e-05, "clip_ratio/region_mean": 0.0011683220400300343, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 723.3928833007812, "completions/mean_terminated_length": 569.9136352539062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 3.6250728862973762, "grad_norm": 0.1619538515806198, "learning_rate": 1e-06, "loss": -0.0034, "num_tokens": 225964886.0, "reward": 0.566964328289032, "reward_std": 0.18163184821605682, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 388 }, { "clip_ratio/high_max": 0.0016635123356536496, "clip_ratio/high_mean": 0.0006454615149777965, "clip_ratio/low_mean": 0.0005227474130151677, "clip_ratio/low_min": 1.8667860786081292e-05, "clip_ratio/region_mean": 0.0011682089389069006, "completions/clipped_ratio": 0.024553571428571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 595.5178833007812, "completions/mean_terminated_length": 507.4049987792969, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 3.6344023323615158, "grad_norm": 0.1991020143032074, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 226497678.0, "reward": 0.652901828289032, "reward_std": 0.16085928678512573, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 389 }, { "clip_ratio/high_max": 0.002146697115676943, "clip_ratio/high_mean": 0.0007646301892236806, "clip_ratio/low_mean": 0.0006863439302833285, "clip_ratio/low_min": 5.6588668485346716e-05, "clip_ratio/region_mean": 0.001450974123144988, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2704.0, "completions/mean_length": 743.0045166015625, "completions/mean_terminated_length": 578.10302734375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 3.643731778425656, "grad_norm": 0.19823414087295532, "learning_rate": 1e-06, "loss": -0.0203, "num_tokens": 227071610.0, "reward": 0.559151828289032, "reward_std": 0.1804393231868744, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 390 }, { "clip_ratio/high_max": 0.001862110133515671, "clip_ratio/high_mean": 0.0006820079397584777, "clip_ratio/low_mean": 0.0004486233756324509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011306313062959816, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3934.0, "completions/mean_length": 705.8560791015625, "completions/mean_terminated_length": 568.0452880859375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 3.6530612244897958, "grad_norm": 0.17820972204208374, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 227647241.0, "reward": 0.652901828289032, "reward_std": 0.1803637593984604, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 391 }, { "clip_ratio/high_max": 0.001610847579286201, "clip_ratio/high_mean": 0.0005727976622438291, "clip_ratio/low_mean": 0.00037567990921161254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009484775582677685, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3027.0, "completions/mean_length": 757.7767944335938, "completions/mean_terminated_length": 572.9752807617188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.6623906705539357, "grad_norm": 0.15047359466552734, "learning_rate": 1e-06, "loss": -0.013, "num_tokens": 228232681.0, "reward": 0.5792410969734192, "reward_std": 0.12238264083862305, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 392 }, { "clip_ratio/high_max": 0.0017563890069141053, "clip_ratio/high_mean": 0.0006777052003599238, "clip_ratio/low_mean": 0.00045894521463196725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011366504077159334, "completions/clipped_ratio": 0.0345982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3657.0, "completions/mean_length": 663.0535888671875, "completions/mean_terminated_length": 540.0231323242188, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 3.6717201166180757, "grad_norm": 0.17460283637046814, "learning_rate": 1e-06, "loss": -0.0155, "num_tokens": 228794985.0, "reward": 0.6629464626312256, "reward_std": 0.16337549686431885, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 393 }, { "clip_ratio/high_max": 0.0020423363166628405, "clip_ratio/high_mean": 0.0008425131418334786, "clip_ratio/low_mean": 0.0005316771175785107, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013741902839683462, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 709.935302734375, "completions/mean_terminated_length": 572.2903442382812, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 3.6810495626822157, "grad_norm": 0.20186789333820343, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 229380255.0, "reward": 0.6127232313156128, "reward_std": 0.17036226391792297, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 394 }, { "clip_ratio/high_max": 0.0013513179546862375, "clip_ratio/high_mean": 0.0005541715963772731, "clip_ratio/low_mean": 0.0005126830387780501, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001066854631062597, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 734.099365234375, "completions/mean_terminated_length": 589.291015625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 3.6903790087463557, "grad_norm": 0.17814481258392334, "learning_rate": 1e-06, "loss": -0.0059, "num_tokens": 229970680.0, "reward": 0.5703125, "reward_std": 0.16191193461418152, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 395 }, { "clip_ratio/high_max": 0.0018220946731162257, "clip_ratio/high_mean": 0.0007106859047780745, "clip_ratio/low_mean": 0.0006099286265452974, "clip_ratio/low_min": 3.661535811261274e-05, "clip_ratio/region_mean": 0.0013206145376898348, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 713.458740234375, "completions/mean_terminated_length": 547.1041870117188, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.6997084548104957, "grad_norm": 0.2087281048297882, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 230526851.0, "reward": 0.6049107313156128, "reward_std": 0.1885194182395935, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 396 }, { "clip_ratio/high_max": 0.0017975419541471638, "clip_ratio/high_mean": 0.0007312477719096933, "clip_ratio/low_mean": 0.0007087690391927026, "clip_ratio/low_min": 1.3766520169156138e-05, "clip_ratio/region_mean": 0.0014400167638086714, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 660.8460083007812, "completions/mean_terminated_length": 521.20556640625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 3.7090379008746357, "grad_norm": 0.25244954228401184, "learning_rate": 1e-06, "loss": -0.0191, "num_tokens": 231061969.0, "reward": 0.6238839626312256, "reward_std": 0.2110590636730194, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 397 }, { "clip_ratio/high_max": 0.0019930917042074725, "clip_ratio/high_mean": 0.0007006300147622824, "clip_ratio/low_mean": 0.0005934465352765983, "clip_ratio/low_min": 1.5621095371898264e-05, "clip_ratio/region_mean": 0.0012940765591338277, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2275.0, "completions/mean_length": 792.5078735351562, "completions/mean_terminated_length": 584.81494140625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.7183673469387752, "grad_norm": 0.1891532838344574, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 231645584.0, "reward": 0.5636160969734192, "reward_std": 0.17934459447860718, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 398 }, { "clip_ratio/high_max": 0.0015383086665679002, "clip_ratio/high_mean": 0.0005625836811304907, "clip_ratio/low_mean": 0.0005180119342185208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010805956153490115, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 624.609375, "completions/mean_terminated_length": 512.6290283203125, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.7276967930029157, "grad_norm": 0.3193029463291168, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 232170738.0, "reward": 0.684151828289032, "reward_std": 0.1433078944683075, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 399 }, { "clip_ratio/high_max": 0.001933300263772253, "clip_ratio/high_mean": 0.0007834839052520692, "clip_ratio/low_mean": 0.0006117325256127515, "clip_ratio/low_min": 1.5184645235422067e-05, "clip_ratio/region_mean": 0.0013952164445072412, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3498.0, "completions/mean_length": 702.0703735351562, "completions/mean_terminated_length": 559.9988403320312, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 3.7370262390670552, "grad_norm": 0.19785086810588837, "learning_rate": 1e-06, "loss": -0.0234, "num_tokens": 232732321.0, "reward": 0.6785714626312256, "reward_std": 0.1814924031496048, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 400 }, { "clip_ratio/high_max": 0.0018581328040454537, "clip_ratio/high_mean": 0.0007588266589664272, "clip_ratio/low_mean": 0.00046630068300146377, "clip_ratio/low_min": 1.549330772832036e-05, "clip_ratio/region_mean": 0.001225127351062838, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3813.0, "completions/mean_length": 707.1842041015625, "completions/mean_terminated_length": 561.216552734375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 3.746355685131195, "grad_norm": 0.18901850283145905, "learning_rate": 1e-06, "loss": 0.0142, "num_tokens": 233296070.0, "reward": 0.668526828289032, "reward_std": 0.1643964797258377, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 401 }, { "clip_ratio/high_max": 0.0014970728334446903, "clip_ratio/high_mean": 0.0005708990856874152, "clip_ratio/low_mean": 0.0003868023632094264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009577014643582515, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 673.4609375, "completions/mean_terminated_length": 575.2250366210938, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.755685131195335, "grad_norm": 0.1590486615896225, "learning_rate": 1e-06, "loss": -0.0047, "num_tokens": 233882291.0, "reward": 0.6517857313156128, "reward_std": 0.13151763379573822, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 402 }, { "clip_ratio/high_max": 0.001991315984923858, "clip_ratio/high_mean": 0.0007662270454602549, "clip_ratio/low_mean": 0.0005122620159454527, "clip_ratio/low_min": 1.4236901733966079e-05, "clip_ratio/region_mean": 0.001278489078686107, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 701.0078735351562, "completions/mean_terminated_length": 563.0, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.765014577259475, "grad_norm": 0.28080618381500244, "learning_rate": 1e-06, "loss": -0.0216, "num_tokens": 234447778.0, "reward": 0.6305803656578064, "reward_std": 0.1858833283185959, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 403 }, { "clip_ratio/high_max": 0.0014330903650261462, "clip_ratio/high_mean": 0.0005325768388502183, "clip_ratio/low_mean": 0.00043402173287176993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009665985562605783, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3895.0, "completions/mean_length": 805.9397583007812, "completions/mean_terminated_length": 582.4195556640625, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 3.774344023323615, "grad_norm": 0.17053110897541046, "learning_rate": 1e-06, "loss": -0.0198, "num_tokens": 235021012.0, "reward": 0.6506696939468384, "reward_std": 0.12610390782356262, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 404 }, { "clip_ratio/high_max": 0.0013069679553154856, "clip_ratio/high_mean": 0.0005528732399397995, "clip_ratio/low_mean": 0.0005487712951435242, "clip_ratio/low_min": 2.5991384973167442e-05, "clip_ratio/region_mean": 0.0011016445241693873, "completions/clipped_ratio": 0.033482142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 703.6641235351562, "completions/mean_terminated_length": 586.1466674804688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.783673469387755, "grad_norm": 0.17060096561908722, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 235629927.0, "reward": 0.5915178656578064, "reward_std": 0.16296502947807312, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 405 }, { "clip_ratio/high_max": 0.0020681845235230867, "clip_ratio/high_mean": 0.0006885394050186733, "clip_ratio/low_mean": 0.00044127773708169116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011298171448288485, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3542.0, "completions/mean_length": 738.8973388671875, "completions/mean_terminated_length": 540.4869995117188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 3.793002915451895, "grad_norm": 0.1886061578989029, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 236167043.0, "reward": 0.5870535969734192, "reward_std": 0.15930767357349396, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263837933540344, "step": 406 }, { "clip_ratio/high_max": 0.001883883582195267, "clip_ratio/high_mean": 0.000595442439589533, "clip_ratio/low_mean": 0.000400367771362653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009958102054952178, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 708.7444458007812, "completions/mean_terminated_length": 579.2201538085938, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 3.8023323615160347, "grad_norm": 0.1705055981874466, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 236748742.0, "reward": 0.6618303656578064, "reward_std": 0.15382862091064453, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 407 }, { "clip_ratio/high_max": 0.0020897837785014417, "clip_ratio/high_mean": 0.0007634654139110353, "clip_ratio/low_mean": 0.0004084199117642129, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011718853420461528, "completions/clipped_ratio": 0.041294642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2886.0, "completions/mean_length": 670.654052734375, "completions/mean_terminated_length": 523.1129150390625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 3.811661807580175, "grad_norm": 0.18541960418224335, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 237277168.0, "reward": 0.6428571939468384, "reward_std": 0.138014018535614, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 408 }, { "clip_ratio/high_max": 0.001379933954012813, "clip_ratio/high_mean": 0.0005394819563662168, "clip_ratio/low_mean": 0.0004734645453936537, "clip_ratio/low_min": 1.4165911125019193e-05, "clip_ratio/region_mean": 0.0010129465135833016, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3088.0, "completions/mean_length": 872.3069458007812, "completions/mean_terminated_length": 615.965087890625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 3.8209912536443147, "grad_norm": 0.16116270422935486, "learning_rate": 1e-06, "loss": -0.0308, "num_tokens": 237874883.0, "reward": 0.5714285969734192, "reward_std": 0.1557818502187729, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 409 }, { "clip_ratio/high_max": 0.001288067122004577, "clip_ratio/high_mean": 0.0004187676495348569, "clip_ratio/low_mean": 0.00037481185336218914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000793579518358456, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 756.2656860351562, "completions/mean_terminated_length": 571.3804931640625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 3.8303206997084547, "grad_norm": 0.12468872219324112, "learning_rate": 1e-06, "loss": -0.0131, "num_tokens": 238451505.0, "reward": 0.5602678656578064, "reward_std": 0.10521144419908524, "rewards/verify_math_reward/mean": 0.5602678656578064, "rewards/verify_math_reward/std": 0.4966317117214203, "step": 410 }, { "clip_ratio/high_max": 0.0015861556566960644, "clip_ratio/high_mean": 0.0005598666220976156, "clip_ratio/low_mean": 0.0004007624011137523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000960629044129746, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2859.0, "completions/mean_length": 749.4386596679688, "completions/mean_terminated_length": 534.8135375976562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 3.8396501457725947, "grad_norm": 0.17238463461399078, "learning_rate": 1e-06, "loss": -0.0163, "num_tokens": 238986634.0, "reward": 0.6428571939468384, "reward_std": 0.1530490666627884, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 411 }, { "clip_ratio/high_max": 0.0016027354431571439, "clip_ratio/high_mean": 0.0006036079685145523, "clip_ratio/low_mean": 0.000477300554393878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010809085179062095, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 752.7120971679688, "completions/mean_terminated_length": 555.1182250976562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 3.8489795918367347, "grad_norm": 0.17312484979629517, "learning_rate": 1e-06, "loss": -0.0068, "num_tokens": 239540840.0, "reward": 0.6082589626312256, "reward_std": 0.1429741084575653, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 412 }, { "clip_ratio/high_max": 0.0014972918725106865, "clip_ratio/high_mean": 0.0005365095371416828, "clip_ratio/low_mean": 0.00033137091645585315, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000867880463374604, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3277.0, "completions/mean_length": 702.779052734375, "completions/mean_terminated_length": 497.9810485839844, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 3.8583090379008746, "grad_norm": 0.16142456233501434, "learning_rate": 1e-06, "loss": -0.0118, "num_tokens": 240040914.0, "reward": 0.707589328289032, "reward_std": 0.1360180377960205, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 413 }, { "clip_ratio/high_max": 0.0019075731761404313, "clip_ratio/high_mean": 0.0007231353165479959, "clip_ratio/low_mean": 0.0005526428140001372, "clip_ratio/low_min": 1.667555989115499e-05, "clip_ratio/region_mean": 0.0012757781296386383, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3487.0, "completions/mean_length": 689.9252319335938, "completions/mean_terminated_length": 563.7742919921875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 3.8676384839650146, "grad_norm": 0.21427814662456512, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 240623687.0, "reward": 0.6149553656578064, "reward_std": 0.17104512453079224, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 414 }, { "clip_ratio/high_max": 0.001695656195806805, "clip_ratio/high_mean": 0.0006169801290525356, "clip_ratio/low_mean": 0.0005569700933847344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001173950240627164, "completions/clipped_ratio": 0.0279017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2344.0, "completions/mean_length": 608.1953125, "completions/mean_terminated_length": 508.0860900878906, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 3.8769679300291546, "grad_norm": 0.18354754149913788, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 241150190.0, "reward": 0.6729910969734192, "reward_std": 0.1443602740764618, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 415 }, { "clip_ratio/high_max": 0.002135642702342011, "clip_ratio/high_mean": 0.0007123456252884353, "clip_ratio/low_mean": 0.0005532885870707105, "clip_ratio/low_min": 1.5617191820638254e-05, "clip_ratio/region_mean": 0.001265634193259757, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 834.4263916015625, "completions/mean_terminated_length": 591.959228515625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 3.8862973760932946, "grad_norm": 0.18390852212905884, "learning_rate": 1e-06, "loss": -0.0231, "num_tokens": 241739988.0, "reward": 0.5546875, "reward_std": 0.17618604004383087, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 416 }, { "clip_ratio/high_max": 0.0016206954096560366, "clip_ratio/high_mean": 0.0007030477354419418, "clip_ratio/low_mean": 0.000554286551050609, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012573342792165931, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3834.0, "completions/mean_length": 764.5960083007812, "completions/mean_terminated_length": 576.0259399414062, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 3.8956268221574346, "grad_norm": 0.2396124005317688, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 242321666.0, "reward": 0.6004464626312256, "reward_std": 0.17615465819835663, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 417 }, { "clip_ratio/high_max": 0.001999508596782107, "clip_ratio/high_mean": 0.0007815576373104705, "clip_ratio/low_mean": 0.0003721101156770601, "clip_ratio/low_min": 1.0111632036569063e-05, "clip_ratio/region_mean": 0.0011536677702679299, "completions/clipped_ratio": 0.0357142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 677.3549194335938, "completions/mean_terminated_length": 550.7384033203125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 3.904956268221574, "grad_norm": 0.1902942657470703, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 242889016.0, "reward": 0.6383928656578064, "reward_std": 0.14943771064281464, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 418 }, { "clip_ratio/high_max": 0.002159133549866965, "clip_ratio/high_mean": 0.000910751932678977, "clip_ratio/low_mean": 0.0006056313368389965, "clip_ratio/low_min": 1.3787778698315378e-05, "clip_ratio/region_mean": 0.0015163832795224153, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 792.075927734375, "completions/mean_terminated_length": 609.1731567382812, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 3.914285714285714, "grad_norm": 0.2009381502866745, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 243491412.0, "reward": 0.6194196939468384, "reward_std": 0.21451324224472046, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 419 }, { "clip_ratio/high_max": 0.0014295754899649182, "clip_ratio/high_mean": 0.0005785217995253333, "clip_ratio/low_mean": 0.0005807519701193087, "clip_ratio/low_min": 1.7327418390777893e-05, "clip_ratio/region_mean": 0.0011592737682804, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 1833.0, "completions/mean_length": 684.1674194335938, "completions/mean_terminated_length": 507.969482421875, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.923615160349854, "grad_norm": 0.19341816008090973, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 244012114.0, "reward": 0.6774553656578064, "reward_std": 0.15634779632091522, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 420 }, { "clip_ratio/high_max": 0.001651230275456328, "clip_ratio/high_mean": 0.0006677539759039064, "clip_ratio/low_mean": 0.0006492672764579765, "clip_ratio/low_min": 2.4468825358781032e-05, "clip_ratio/region_mean": 0.001317021255090367, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 802.0904541015625, "completions/mean_terminated_length": 565.6853637695312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 3.932944606413994, "grad_norm": 0.1774033010005951, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 244578291.0, "reward": 0.574776828289032, "reward_std": 0.17468081414699554, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 421 }, { "clip_ratio/high_max": 0.001811074347642716, "clip_ratio/high_mean": 0.0006715565041304217, "clip_ratio/low_mean": 0.0005162078393823322, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011877643264597282, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3190.0, "completions/mean_length": 793.2701416015625, "completions/mean_terminated_length": 581.4560546875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 3.942274052478134, "grad_norm": 0.19825774431228638, "learning_rate": 1e-06, "loss": -0.0065, "num_tokens": 245153885.0, "reward": 0.6026785969734192, "reward_std": 0.16709676384925842, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 422 }, { "clip_ratio/high_max": 0.001544920240121428, "clip_ratio/high_mean": 0.0006404981068044435, "clip_ratio/low_mean": 0.0004122852078580763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010527833328524139, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3552.0, "completions/mean_length": 747.935302734375, "completions/mean_terminated_length": 579.1582641601562, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.951603498542274, "grad_norm": 0.16005107760429382, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 245739067.0, "reward": 0.6004464626312256, "reward_std": 0.13319942355155945, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 423 }, { "clip_ratio/high_max": 0.001829434411774855, "clip_ratio/high_mean": 0.0006740614317095606, "clip_ratio/low_mean": 0.00046427523557213135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011383366436348297, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3272.0, "completions/mean_length": 704.7767944335938, "completions/mean_terminated_length": 512.8207397460938, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.960932944606414, "grad_norm": 0.1984466165304184, "learning_rate": 1e-06, "loss": -0.005, "num_tokens": 246256451.0, "reward": 0.6573660969734192, "reward_std": 0.15744110941886902, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 424 }, { "clip_ratio/high_max": 0.0019211695980629884, "clip_ratio/high_mean": 0.0006977131324674701, "clip_ratio/low_mean": 0.0005139370568940649, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012116501566197257, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3219.0, "completions/mean_length": 736.3538208007812, "completions/mean_terminated_length": 562.8509521484375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 3.970262390670554, "grad_norm": 0.18405428528785706, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 246824592.0, "reward": 0.6205357313156128, "reward_std": 0.17325936257839203, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 425 }, { "clip_ratio/high_max": 0.002113569153152639, "clip_ratio/high_mean": 0.0007154547784011811, "clip_ratio/low_mean": 0.0005296306944728713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012450854774215259, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3477.0, "completions/mean_length": 678.3248291015625, "completions/mean_terminated_length": 526.959228515625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 3.979591836734694, "grad_norm": 0.19453385472297668, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 247367003.0, "reward": 0.6439732313156128, "reward_std": 0.1684875339269638, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 426 }, { "clip_ratio/high_max": 0.0016110108535940526, "clip_ratio/high_mean": 0.0006107969102231436, "clip_ratio/low_mean": 0.00043135071882716147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010421476290503051, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3254.0, "completions/mean_length": 754.3516235351562, "completions/mean_terminated_length": 548.468017578125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 3.9889212827988336, "grad_norm": 0.16089829802513123, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 247920278.0, "reward": 0.6696428656578064, "reward_std": 0.13230928778648376, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 427 }, { "clip_ratio/high_max": 0.0024735098049859516, "clip_ratio/high_mean": 0.0009925615377142094, "clip_ratio/low_mean": 0.0007235122666315874, "clip_ratio/low_min": 3.9696005842415616e-05, "clip_ratio/region_mean": 0.0017160737843369134, "completions/clipped_ratio": 0.03693181818181823, "completions/max_length": 4096.0, "completions/max_terminated_length": 3544.0, "completions/mean_length": 668.04833984375, "completions/mean_terminated_length": 536.5928955078125, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 3.9982507288629736, "grad_norm": 0.21787001192569733, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 248463154.0, "reward": 0.6774553656578064, "reward_std": 0.18806587159633636, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 428 }, { "clip_ratio/high_max": 0.001550124095956562, "clip_ratio/high_mean": 0.0005623620854748879, "clip_ratio/low_mean": 0.0005324066460161703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010947687478619628, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3375.0, "completions/mean_length": 734.4710083007812, "completions/mean_terminated_length": 527.362548828125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.0093294460641395, "grad_norm": 0.17575211822986603, "learning_rate": 1e-06, "loss": -0.0242, "num_tokens": 249002344.0, "reward": 0.660714328289032, "reward_std": 0.1356828510761261, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 429 }, { "clip_ratio/high_max": 0.0018280293879797682, "clip_ratio/high_mean": 0.0006028579900885234, "clip_ratio/low_mean": 0.0006333552573778434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012362132583803032, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 697.0435791015625, "completions/mean_terminated_length": 554.7615966796875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.01865889212828, "grad_norm": 0.17597052454948425, "learning_rate": 1e-06, "loss": -0.0081, "num_tokens": 249564175.0, "reward": 0.6127232313156128, "reward_std": 0.1612725704908371, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 430 }, { "clip_ratio/high_max": 0.0018020729767158628, "clip_ratio/high_mean": 0.0007306219995371066, "clip_ratio/low_mean": 0.00048518850417167414, "clip_ratio/low_min": 3.35315507982159e-05, "clip_ratio/region_mean": 0.0012158105237176642, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 736.1830444335938, "completions/mean_terminated_length": 575.0689697265625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.0279883381924195, "grad_norm": 0.18957768380641937, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 250134251.0, "reward": 0.629464328289032, "reward_std": 0.16491781175136566, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 431 }, { "clip_ratio/high_max": 0.0016300738425343297, "clip_ratio/high_mean": 0.00061281691705517, "clip_ratio/low_mean": 0.0005833730356243905, "clip_ratio/low_min": 1.642143979552202e-05, "clip_ratio/region_mean": 0.0011961899435846135, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 731.4297485351562, "completions/mean_terminated_length": 598.7203979492188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.03731778425656, "grad_norm": 0.1931212693452835, "learning_rate": 1e-06, "loss": -0.0074, "num_tokens": 250736332.0, "reward": 0.6584821939468384, "reward_std": 0.18986491858959198, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 432 }, { "clip_ratio/high_max": 0.001458179725887021, "clip_ratio/high_mean": 0.0005024361425967072, "clip_ratio/low_mean": 0.00032729567737987963, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008297318090626504, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 832.6920166015625, "completions/mean_terminated_length": 606.83056640625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 4.0466472303206995, "grad_norm": 0.1558295339345932, "learning_rate": 1e-06, "loss": -0.0039, "num_tokens": 251329912.0, "reward": 0.5859375, "reward_std": 0.13185352087020874, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 433 }, { "clip_ratio/high_max": 0.0023373076001007576, "clip_ratio/high_mean": 0.0009709448768262519, "clip_ratio/low_mean": 0.0007500772353523644, "clip_ratio/low_min": 4.23151723225601e-05, "clip_ratio/region_mean": 0.0017210221267305315, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 896.3917846679688, "completions/mean_terminated_length": 587.0048828125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 4.05597667638484, "grad_norm": 0.2019355148077011, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 251909367.0, "reward": 0.6004464626312256, "reward_std": 0.2145560085773468, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 434 }, { "clip_ratio/high_max": 0.0020325570585555397, "clip_ratio/high_mean": 0.0007336153103096876, "clip_ratio/low_mean": 0.00047519568897769204, "clip_ratio/low_min": 2.19606463360833e-05, "clip_ratio/region_mean": 0.0012088110015611164, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2026.0, "completions/mean_length": 680.1361694335938, "completions/mean_terminated_length": 499.5087890625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 4.0653061224489795, "grad_norm": 0.1919316202402115, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 252418449.0, "reward": 0.6718750596046448, "reward_std": 0.1665661334991455, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 435 }, { "clip_ratio/high_max": 0.0017925076244864613, "clip_ratio/high_mean": 0.000628407547083043, "clip_ratio/low_mean": 0.0003784722875934676, "clip_ratio/low_min": 1.4898689187248237e-05, "clip_ratio/region_mean": 0.0010068798219435848, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2798.0, "completions/mean_length": 791.8348388671875, "completions/mean_terminated_length": 546.2014770507812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 4.07463556851312, "grad_norm": 0.1585756242275238, "learning_rate": 1e-06, "loss": -0.0252, "num_tokens": 252964285.0, "reward": 0.6629464626312256, "reward_std": 0.13455379009246826, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 436 }, { "clip_ratio/high_max": 0.0019483662836137228, "clip_ratio/high_mean": 0.0008094949462247314, "clip_ratio/low_mean": 0.0006003248581691878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00140981977165211, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2295.0, "completions/mean_length": 779.6495971679688, "completions/mean_terminated_length": 545.8804931640625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 4.0839650145772595, "grad_norm": 0.21476420760154724, "learning_rate": 1e-06, "loss": -0.0162, "num_tokens": 253510843.0, "reward": 0.5814732313156128, "reward_std": 0.17577417194843292, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 437 }, { "clip_ratio/high_max": 0.0018718587707553525, "clip_ratio/high_mean": 0.0007202738433988998, "clip_ratio/low_mean": 0.000541594497008191, "clip_ratio/low_min": 2.1724017642554827e-05, "clip_ratio/region_mean": 0.0012618683358596172, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3965.0, "completions/mean_length": 828.8236694335938, "completions/mean_terminated_length": 594.3372802734375, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 4.093294460641399, "grad_norm": 0.19090571999549866, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 254088917.0, "reward": 0.5803571939468384, "reward_std": 0.18577638268470764, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 438 }, { "clip_ratio/high_max": 0.0016446651679871138, "clip_ratio/high_mean": 0.000559275911655277, "clip_ratio/low_mean": 0.00048501389574084897, "clip_ratio/low_min": 1.07499135992839e-05, "clip_ratio/region_mean": 0.001044289798301179, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3640.0, "completions/mean_length": 748.8660888671875, "completions/mean_terminated_length": 600.6246948242188, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 4.1026239067055394, "grad_norm": 0.17010226845741272, "learning_rate": 1e-06, "loss": -0.0055, "num_tokens": 254694853.0, "reward": 0.5803571939468384, "reward_std": 0.16078469157218933, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 439 }, { "clip_ratio/high_max": 0.0017630194415687583, "clip_ratio/high_mean": 0.0006545774831465678, "clip_ratio/low_mean": 0.0005907071354158688, "clip_ratio/low_min": 1.2150077964179218e-05, "clip_ratio/region_mean": 0.0012452845985535532, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 786.872802734375, "completions/mean_terminated_length": 607.7905883789062, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 4.111953352769679, "grad_norm": 0.15964417159557343, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 255293563.0, "reward": 0.5792410969734192, "reward_std": 0.16709743440151215, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 440 }, { "clip_ratio/high_max": 0.0014676659920951352, "clip_ratio/high_mean": 0.0005042846823926084, "clip_ratio/low_mean": 0.0004731673179776408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009774520021892386, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2953.0, "completions/mean_length": 705.7645263671875, "completions/mean_terminated_length": 576.1262817382812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.121282798833819, "grad_norm": 0.21414446830749512, "learning_rate": 1e-06, "loss": -0.0131, "num_tokens": 255874480.0, "reward": 0.6808035969734192, "reward_std": 0.14199630916118622, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 441 }, { "clip_ratio/high_max": 0.002008291019592434, "clip_ratio/high_mean": 0.0006626217946177348, "clip_ratio/low_mean": 0.0004880354131273634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011506572445796337, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3138.0, "completions/mean_length": 720.8326416015625, "completions/mean_terminated_length": 583.6306762695312, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 4.130612244897959, "grad_norm": 0.1744566708803177, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 256467282.0, "reward": 0.6160714626312256, "reward_std": 0.15849420428276062, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 442 }, { "clip_ratio/high_max": 0.0018489293870516121, "clip_ratio/high_mean": 0.000563837046684057, "clip_ratio/low_mean": 0.00033357295546920795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008974099946499337, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2574.0, "completions/mean_length": 723.3482666015625, "completions/mean_terminated_length": 557.4800415039062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 4.139941690962099, "grad_norm": 0.18050570785999298, "learning_rate": 1e-06, "loss": -0.0082, "num_tokens": 257041730.0, "reward": 0.6361607313156128, "reward_std": 0.12674400210380554, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 443 }, { "clip_ratio/high_max": 0.0014826594124315307, "clip_ratio/high_mean": 0.0005463933921419084, "clip_ratio/low_mean": 0.0005113395827720524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010577329594525509, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2941.0, "completions/mean_length": 640.4096069335938, "completions/mean_terminated_length": 528.93896484375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.149271137026239, "grad_norm": 0.1837472915649414, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 257597953.0, "reward": 0.6305803656578064, "reward_std": 0.149361714720726, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 444 }, { "clip_ratio/high_max": 0.0019662171325762756, "clip_ratio/high_mean": 0.0007879960066929925, "clip_ratio/low_mean": 0.0006120282741903793, "clip_ratio/low_min": 2.2731404897058383e-05, "clip_ratio/region_mean": 0.0014000243027112447, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 688.4174194335938, "completions/mean_terminated_length": 549.8977661132812, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.158600583090379, "grad_norm": 0.2023085504770279, "learning_rate": 1e-06, "loss": 0.0079, "num_tokens": 258162423.0, "reward": 0.6618303656578064, "reward_std": 0.1816311627626419, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 445 }, { "clip_ratio/high_max": 0.0019811143902188633, "clip_ratio/high_mean": 0.0007271489575941814, "clip_ratio/low_mean": 0.0004393708222778514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001166519767139107, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 720.2924194335938, "completions/mean_terminated_length": 566.672119140625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.167930029154519, "grad_norm": 0.17384913563728333, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 258734085.0, "reward": 0.652901828289032, "reward_std": 0.17525240778923035, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 446 }, { "clip_ratio/high_max": 0.001937957677000668, "clip_ratio/high_mean": 0.0007817323348717764, "clip_ratio/low_mean": 0.0006255138932829141, "clip_ratio/low_min": 1.4595982975151855e-05, "clip_ratio/region_mean": 0.001407246232702164, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 771.0078735351562, "completions/mean_terminated_length": 561.9632568359375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 4.1772594752186585, "grad_norm": 0.24516253173351288, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 259298228.0, "reward": 0.6026785969734192, "reward_std": 0.175969198346138, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 447 }, { "clip_ratio/high_max": 0.0022653146806987934, "clip_ratio/high_mean": 0.0007788074835843872, "clip_ratio/low_mean": 0.0004663111021727673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012451185648387764, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3996.0, "completions/mean_length": 714.8750610351562, "completions/mean_terminated_length": 552.7391967773438, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.186588921282799, "grad_norm": 0.16899371147155762, "learning_rate": 1e-06, "loss": -0.0336, "num_tokens": 259866924.0, "reward": 0.6462053656578064, "reward_std": 0.15210728347301483, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 448 }, { "clip_ratio/high_max": 0.0015653916780138388, "clip_ratio/high_mean": 0.0006049003204680048, "clip_ratio/low_mean": 0.0005498287810041802, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001154729110567132, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3565.0, "completions/mean_length": 794.388427734375, "completions/mean_terminated_length": 574.2809448242188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 4.1959183673469385, "grad_norm": 0.17593829333782196, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 260438440.0, "reward": 0.6026785969734192, "reward_std": 0.16360443830490112, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 449 }, { "clip_ratio/high_max": 0.0016926053067436442, "clip_ratio/high_mean": 0.000615798800936318, "clip_ratio/low_mean": 0.0006116657750681043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012274645596335176, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3470.0, "completions/mean_length": 694.6674194335938, "completions/mean_terminated_length": 560.5081176757812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.205247813411079, "grad_norm": 0.2083657830953598, "learning_rate": 1e-06, "loss": -0.0093, "num_tokens": 261009934.0, "reward": 0.6272321939468384, "reward_std": 0.17021070420742035, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 450 }, { "clip_ratio/high_max": 0.0014267964179452974, "clip_ratio/high_mean": 0.0005876838677068008, "clip_ratio/low_mean": 0.000573326185985934, "clip_ratio/low_min": 1.8355360225541517e-05, "clip_ratio/region_mean": 0.0011610100373218302, "completions/clipped_ratio": 0.03125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3799.0, "completions/mean_length": 685.5781860351562, "completions/mean_terminated_length": 575.5645141601562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.214577259475218, "grad_norm": 0.17914150655269623, "learning_rate": 1e-06, "loss": -0.0089, "num_tokens": 261601044.0, "reward": 0.6540178656578064, "reward_std": 0.17472361028194427, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 451 }, { "clip_ratio/high_max": 0.0016414470119343605, "clip_ratio/high_mean": 0.0005945230295765214, "clip_ratio/low_mean": 0.00056577136820124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011602943850448355, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 781.6707763671875, "completions/mean_terminated_length": 522.4271850585938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.223906705539359, "grad_norm": 0.17691665887832642, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 262122149.0, "reward": 0.6026785969734192, "reward_std": 0.16398167610168457, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 452 }, { "clip_ratio/high_max": 0.002144000154657988, "clip_ratio/high_mean": 0.0008388274454773637, "clip_ratio/low_mean": 0.0005876320974493865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001426459519279888, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3902.0, "completions/mean_length": 814.6842041015625, "completions/mean_terminated_length": 612.517822265625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 4.233236151603498, "grad_norm": 0.200413316488266, "learning_rate": 1e-06, "loss": -0.0175, "num_tokens": 262732994.0, "reward": 0.6082589626312256, "reward_std": 0.18303264677524567, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 453 }, { "clip_ratio/high_max": 0.0018000014970311895, "clip_ratio/high_mean": 0.0007076233650877839, "clip_ratio/low_mean": 0.0004935935685352888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012012169536319561, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3184.0, "completions/mean_length": 737.0692138671875, "completions/mean_terminated_length": 551.121337890625, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.242565597667639, "grad_norm": 0.19994544982910156, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 263297664.0, "reward": 0.6395089626312256, "reward_std": 0.1640922725200653, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 454 }, { "clip_ratio/high_max": 0.0018318804432055913, "clip_ratio/high_mean": 0.0005455324644572102, "clip_ratio/low_mean": 0.0004073308882652782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009528633672744036, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 713.9207763671875, "completions/mean_terminated_length": 560.010498046875, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.251895043731778, "grad_norm": 0.1690525859594345, "learning_rate": 1e-06, "loss": -0.0173, "num_tokens": 263862577.0, "reward": 0.6674107313156128, "reward_std": 0.1351943016052246, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 455 }, { "clip_ratio/high_max": 0.0020246798230800778, "clip_ratio/high_mean": 0.0007691207392781507, "clip_ratio/low_mean": 0.0006225039333003224, "clip_ratio/low_min": 3.5597511669038795e-05, "clip_ratio/region_mean": 0.001391624642565148, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 730.9766235351562, "completions/mean_terminated_length": 548.869384765625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 4.261224489795918, "grad_norm": 0.1988225132226944, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 264413948.0, "reward": 0.6428571939468384, "reward_std": 0.1742018610239029, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 456 }, { "clip_ratio/high_max": 0.001460505171053228, "clip_ratio/high_mean": 0.0005831749031131039, "clip_ratio/low_mean": 0.00040539355359214824, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009885684430628316, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3958.0, "completions/mean_length": 821.3616333007812, "completions/mean_terminated_length": 565.22265625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 4.270553935860058, "grad_norm": 0.16104991734027863, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 264969280.0, "reward": 0.5959821939468384, "reward_std": 0.13429415225982666, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 457 }, { "clip_ratio/high_max": 0.0023104666615836322, "clip_ratio/high_mean": 0.0006289108641794883, "clip_ratio/low_mean": 0.0005199798324611038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011488906711747404, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2340.0, "completions/mean_length": 824.4319458007812, "completions/mean_terminated_length": 581.2218627929688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.279883381924198, "grad_norm": 0.18824583292007446, "learning_rate": 1e-06, "loss": -0.0044, "num_tokens": 265539691.0, "reward": 0.6004464626312256, "reward_std": 0.1331673413515091, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 458 }, { "clip_ratio/high_max": 0.002066777720756363, "clip_ratio/high_mean": 0.0007543249539594399, "clip_ratio/low_mean": 0.0003897678893736156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011440928574302234, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3074.0, "completions/mean_length": 735.521240234375, "completions/mean_terminated_length": 574.3754272460938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.289212827988338, "grad_norm": 0.18381187319755554, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 266119558.0, "reward": 0.6350446939468384, "reward_std": 0.1475597620010376, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 459 }, { "clip_ratio/high_max": 0.001597754508111393, "clip_ratio/high_mean": 0.0006955106946406886, "clip_ratio/low_mean": 0.000667080661514774, "clip_ratio/low_min": 1.5632816939614713e-05, "clip_ratio/region_mean": 0.0013625913888972718, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3586.0, "completions/mean_length": 809.3739013671875, "completions/mean_terminated_length": 611.0094604492188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.298542274052478, "grad_norm": 0.173160120844841, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 266736085.0, "reward": 0.578125, "reward_std": 0.1738220751285553, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 460 }, { "clip_ratio/high_max": 0.0019878382809110917, "clip_ratio/high_mean": 0.0007721334459347418, "clip_ratio/low_mean": 0.0005932776821282459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013654111462528817, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3248.0, "completions/mean_length": 778.5670166015625, "completions/mean_terminated_length": 586.6493530273438, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 4.307871720116618, "grad_norm": 0.18714334070682526, "learning_rate": 1e-06, "loss": -0.0143, "num_tokens": 267324329.0, "reward": 0.6071428656578064, "reward_std": 0.16450455784797668, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 461 }, { "clip_ratio/high_max": 0.002032181990216486, "clip_ratio/high_mean": 0.0007165919905673945, "clip_ratio/low_mean": 0.0004605987501236086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001177190752059687, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2023.0, "completions/mean_length": 649.9788208007812, "completions/mean_terminated_length": 514.0568237304688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 4.317201166180758, "grad_norm": 0.19159138202667236, "learning_rate": 1e-06, "loss": -0.0209, "num_tokens": 267862102.0, "reward": 0.6551339626312256, "reward_std": 0.14188572764396667, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 462 }, { "clip_ratio/high_max": 0.0017651188863965217, "clip_ratio/high_mean": 0.0007189238440332701, "clip_ratio/low_mean": 0.0004800896194865345, "clip_ratio/low_min": 1.402603265887592e-05, "clip_ratio/region_mean": 0.0011990134680672782, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3392.0, "completions/mean_length": 825.5535888671875, "completions/mean_terminated_length": 544.0969848632812, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 4.326530612244898, "grad_norm": 0.1885886788368225, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 268401510.0, "reward": 0.5915178656578064, "reward_std": 0.154136061668396, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 463 }, { "clip_ratio/high_max": 0.0016796603413240518, "clip_ratio/high_mean": 0.000691595454554772, "clip_ratio/low_mean": 0.0004600154034051229, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011516108570504002, "completions/clipped_ratio": 0.0323660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3545.0, "completions/mean_length": 698.9308471679688, "completions/mean_terminated_length": 585.3033447265625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.335860058309038, "grad_norm": 0.19632436335086823, "learning_rate": 1e-06, "loss": 0.008, "num_tokens": 268995784.0, "reward": 0.6651785969734192, "reward_std": 0.14196310937404633, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 464 }, { "clip_ratio/high_max": 0.0016694177611498162, "clip_ratio/high_mean": 0.0006217189511517063, "clip_ratio/low_mean": 0.0004182116781521472, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010399306192994118, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 669.4163208007812, "completions/mean_terminated_length": 525.9779052734375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 4.345189504373177, "grad_norm": 0.19902345538139343, "learning_rate": 1e-06, "loss": -0.005, "num_tokens": 269535069.0, "reward": 0.684151828289032, "reward_std": 0.12572552263736725, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 465 }, { "clip_ratio/high_max": 0.0015598318714182824, "clip_ratio/high_mean": 0.0006147263784441748, "clip_ratio/low_mean": 0.00025568385171936825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008704102292540483, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3598.0, "completions/mean_length": 674.9285888671875, "completions/mean_terminated_length": 539.99072265625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.354518950437318, "grad_norm": 0.15384966135025024, "learning_rate": 1e-06, "loss": -0.0253, "num_tokens": 270088189.0, "reward": 0.7053571939468384, "reward_std": 0.133991077542305, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 466 }, { "clip_ratio/high_max": 0.0015461368420801591, "clip_ratio/high_mean": 0.0005656045050272951, "clip_ratio/low_mean": 0.0004643862666853238, "clip_ratio/low_min": 1.5078408068802673e-05, "clip_ratio/region_mean": 0.0010299907698936295, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 684.8538208007812, "completions/mean_terminated_length": 525.4544067382812, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.363848396501457, "grad_norm": 0.2761067748069763, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 270625442.0, "reward": 0.640625, "reward_std": 0.1557832509279251, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 467 }, { "clip_ratio/high_max": 0.002094090494210832, "clip_ratio/high_mean": 0.000778362604251015, "clip_ratio/low_mean": 0.000514425164055865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012927877723996062, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3219.0, "completions/mean_length": 730.0469360351562, "completions/mean_terminated_length": 539.521240234375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 4.373177842565598, "grad_norm": 0.5231519937515259, "learning_rate": 1e-06, "loss": -0.0252, "num_tokens": 271168580.0, "reward": 0.668526828289032, "reward_std": 0.1542862057685852, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 468 }, { "clip_ratio/high_max": 0.0017877782665891573, "clip_ratio/high_mean": 0.0006603659430766129, "clip_ratio/low_mean": 0.00032976200827761204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009901279627229087, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 828.5167846679688, "completions/mean_terminated_length": 560.1726684570312, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.382507288629737, "grad_norm": 0.22118157148361206, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 271722323.0, "reward": 0.6149553656578064, "reward_std": 0.14507634937763214, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 469 }, { "clip_ratio/high_max": 0.0016418554696429055, "clip_ratio/high_mean": 0.0005991567741148174, "clip_ratio/low_mean": 0.000684322667439119, "clip_ratio/low_min": 1.2042389244015794e-05, "clip_ratio/region_mean": 0.0012834794360969681, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 735.4408569335938, "completions/mean_terminated_length": 586.6049194335938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.391836734693878, "grad_norm": 0.17829455435276031, "learning_rate": 1e-06, "loss": -0.0188, "num_tokens": 272321646.0, "reward": 0.5848214626312256, "reward_std": 0.16717232763767242, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 470 }, { "clip_ratio/high_max": 0.0017424862526240759, "clip_ratio/high_mean": 0.0005964314627817657, "clip_ratio/low_mean": 0.00040148980042431504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00099792124274245, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 801.927490234375, "completions/mean_terminated_length": 569.728759765625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 4.401166180758017, "grad_norm": 0.17486721277236938, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 272896141.0, "reward": 0.621651828289032, "reward_std": 0.14643321931362152, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 471 }, { "clip_ratio/high_max": 0.0018719569452514406, "clip_ratio/high_mean": 0.0006134354989626445, "clip_ratio/low_mean": 0.0005102566428831778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011236921382078435, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3691.0, "completions/mean_length": 727.091552734375, "completions/mean_terminated_length": 523.7609252929688, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 4.410495626822158, "grad_norm": 0.1855579912662506, "learning_rate": 1e-06, "loss": -0.0144, "num_tokens": 273430167.0, "reward": 0.6383928656578064, "reward_std": 0.15180349349975586, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 472 }, { "clip_ratio/high_max": 0.0017102072451962158, "clip_ratio/high_mean": 0.0006409972775145434, "clip_ratio/low_mean": 0.0006280849593167659, "clip_ratio/low_min": 2.213956759078428e-05, "clip_ratio/region_mean": 0.0012690822113654576, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 746.2410888671875, "completions/mean_terminated_length": 531.4109497070312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 4.419825072886297, "grad_norm": 0.20268253982067108, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 273969439.0, "reward": 0.6160714626312256, "reward_std": 0.16953809559345245, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 473 }, { "clip_ratio/high_max": 0.0014539443545800168, "clip_ratio/high_mean": 0.0006013629317749292, "clip_ratio/low_mean": 0.00045351814969762927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001054881104209926, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3914.0, "completions/mean_length": 706.0870971679688, "completions/mean_terminated_length": 576.461181640625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 4.429154518950437, "grad_norm": 0.1866583377122879, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 274557893.0, "reward": 0.6238839626312256, "reward_std": 0.14759299159049988, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 474 }, { "clip_ratio/high_max": 0.0011870732232637238, "clip_ratio/high_mean": 0.0004201244241812674, "clip_ratio/low_mean": 0.0006123480252426816, "clip_ratio/low_min": 1.4789398846914992e-05, "clip_ratio/region_mean": 0.0010324724316888023, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3373.0, "completions/mean_length": 837.8471069335938, "completions/mean_terminated_length": 578.7650756835938, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 4.438483965014577, "grad_norm": 0.16509538888931274, "learning_rate": 1e-06, "loss": -0.0111, "num_tokens": 275129516.0, "reward": 0.5636160969734192, "reward_std": 0.1417027711868286, "rewards/verify_math_reward/mean": 0.5636160969734192, "rewards/verify_math_reward/std": 0.49621346592903137, "step": 475 }, { "clip_ratio/high_max": 0.0017929547393578105, "clip_ratio/high_mean": 0.0006511890314868651, "clip_ratio/low_mean": 0.00043041574213020795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010816047542903107, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1803.0, "completions/mean_length": 692.7120971679688, "completions/mean_terminated_length": 521.1512451171875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.447813411078717, "grad_norm": 0.23841670155525208, "learning_rate": 1e-06, "loss": -0.0147, "num_tokens": 275663066.0, "reward": 0.6629464626312256, "reward_std": 0.13339374959468842, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 476 }, { "clip_ratio/high_max": 0.0022910384795977734, "clip_ratio/high_mean": 0.0008949081111495616, "clip_ratio/low_mean": 0.0005468743415804056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014417824349948205, "completions/clipped_ratio": 0.046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3545.0, "completions/mean_length": 700.8560791015625, "completions/mean_terminated_length": 533.8817138671875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.457142857142857, "grad_norm": 0.21968919038772583, "learning_rate": 1e-06, "loss": -0.0183, "num_tokens": 276204841.0, "reward": 0.6674107313156128, "reward_std": 0.17536300420761108, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 477 }, { "clip_ratio/high_max": 0.001537029049359262, "clip_ratio/high_mean": 0.0005785736011603149, "clip_ratio/low_mean": 0.0005203723349040956, "clip_ratio/low_min": 2.3229882572195493e-05, "clip_ratio/region_mean": 0.0010989459515258204, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3737.0, "completions/mean_length": 754.0535888671875, "completions/mean_terminated_length": 606.0419921875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 4.466472303206997, "grad_norm": 0.19907844066619873, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 276812321.0, "reward": 0.637276828289032, "reward_std": 0.16559115052223206, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 478 }, { "clip_ratio/high_max": 0.0021917228477832396, "clip_ratio/high_mean": 0.0007876925938035129, "clip_ratio/low_mean": 0.0006437472352445184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014314398031274322, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3900.0, "completions/mean_length": 785.2188110351562, "completions/mean_terminated_length": 551.84228515625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 4.475801749271137, "grad_norm": 0.26668596267700195, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 277364997.0, "reward": 0.6328125, "reward_std": 0.17584973573684692, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 479 }, { "clip_ratio/high_max": 0.0014978326744312653, "clip_ratio/high_mean": 0.000552147514099488, "clip_ratio/low_mean": 0.000527881029938726, "clip_ratio/low_min": 1.577884358994197e-05, "clip_ratio/region_mean": 0.0010800285435834667, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3816.0, "completions/mean_length": 848.4721069335938, "completions/mean_terminated_length": 581.7669067382812, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 4.485131195335277, "grad_norm": 0.1616966277360916, "learning_rate": 1e-06, "loss": -0.0252, "num_tokens": 277940524.0, "reward": 0.6015625, "reward_std": 0.14943841099739075, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 480 }, { "clip_ratio/high_max": 0.0022573780624952633, "clip_ratio/high_mean": 0.0007787540616845945, "clip_ratio/low_mean": 0.0006733808249919093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014521348712150939, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3029.0, "completions/mean_length": 739.8917846679688, "completions/mean_terminated_length": 490.3968811035156, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 4.494460641399417, "grad_norm": 0.20210717618465424, "learning_rate": 1e-06, "loss": -0.0121, "num_tokens": 278436291.0, "reward": 0.6238839626312256, "reward_std": 0.14766854047775269, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 481 }, { "clip_ratio/high_max": 0.0012219393265695544, "clip_ratio/high_mean": 0.00044862418280899874, "clip_ratio/low_mean": 0.0005656630401063012, "clip_ratio/low_min": 1.3922922335041221e-05, "clip_ratio/region_mean": 0.001014287227008026, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 772.2678833007812, "completions/mean_terminated_length": 554.9013061523438, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 4.503790087463557, "grad_norm": 0.17671526968479156, "learning_rate": 1e-06, "loss": -0.0107, "num_tokens": 278995203.0, "reward": 0.6272321939468384, "reward_std": 0.12685278058052063, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 482 }, { "clip_ratio/high_max": 0.0017179354799736757, "clip_ratio/high_mean": 0.0005674014209944289, "clip_ratio/low_mean": 0.00040528669796913164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009726881180540659, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3602.0, "completions/mean_length": 722.7545166015625, "completions/mean_terminated_length": 489.2840270996094, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 4.513119533527696, "grad_norm": 0.16090349853038788, "learning_rate": 1e-06, "loss": -0.012, "num_tokens": 279485391.0, "reward": 0.6674107313156128, "reward_std": 0.1107741966843605, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 483 }, { "clip_ratio/high_max": 0.0015326368047681171, "clip_ratio/high_mean": 0.0005107696488266811, "clip_ratio/low_mean": 0.0004591243396134814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009698939938971307, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4038.0, "completions/mean_length": 817.7767944335938, "completions/mean_terminated_length": 603.3864135742188, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 4.522448979591837, "grad_norm": 0.16423842310905457, "learning_rate": 1e-06, "loss": -0.0391, "num_tokens": 280075135.0, "reward": 0.6183035969734192, "reward_std": 0.13433623313903809, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 484 }, { "clip_ratio/high_max": 0.0011952998420383665, "clip_ratio/high_mean": 0.00033654845924502297, "clip_ratio/low_mean": 0.0004079132208971714, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007444616749125998, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1790.0, "completions/mean_length": 761.6004638671875, "completions/mean_terminated_length": 551.9644165039062, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 4.531778425655976, "grad_norm": 0.14998149871826172, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 280633321.0, "reward": 0.5736607313156128, "reward_std": 0.1043110266327858, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 485 }, { "clip_ratio/high_max": 0.0013147492645657621, "clip_ratio/high_mean": 0.0004685137801061501, "clip_ratio/low_mean": 0.00047560554958181456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009441193396924064, "completions/clipped_ratio": 0.0379464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2635.0, "completions/mean_length": 647.2433471679688, "completions/mean_terminated_length": 511.21343994140625, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 4.541107871720117, "grad_norm": 0.17931948602199554, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 281152339.0, "reward": 0.6428571939468384, "reward_std": 0.1504134237766266, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 486 }, { "clip_ratio/high_max": 0.0018322935429750942, "clip_ratio/high_mean": 0.0007823343203199329, "clip_ratio/low_mean": 0.0005496792377925885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013320135403773747, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 763.3895263671875, "completions/mean_terminated_length": 562.2496948242188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 4.550437317784256, "grad_norm": 0.2100006639957428, "learning_rate": 1e-06, "loss": -0.0256, "num_tokens": 281713448.0, "reward": 0.6495535969734192, "reward_std": 0.1673588901758194, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 487 }, { "clip_ratio/high_max": 0.0017669797198323067, "clip_ratio/high_mean": 0.0006832164363004267, "clip_ratio/low_mean": 0.0004076522900504642, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010908686999755446, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 693.9553833007812, "completions/mean_terminated_length": 530.8163452148438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 4.559766763848397, "grad_norm": 0.21150177717208862, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 282253192.0, "reward": 0.7265625596046448, "reward_std": 0.16081610321998596, "rewards/verify_math_reward/mean": 0.7265625, "rewards/verify_math_reward/std": 0.4459724426269531, "step": 488 }, { "clip_ratio/high_max": 0.001862763430835912, "clip_ratio/high_mean": 0.0006479796838902985, "clip_ratio/low_mean": 0.0004711603437499434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001119140040827915, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3180.0, "completions/mean_length": 785.8516235351562, "completions/mean_terminated_length": 560.9666137695312, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 4.569096209912536, "grad_norm": 0.23946093022823334, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 282817283.0, "reward": 0.6428571939468384, "reward_std": 0.15195181965827942, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 489 }, { "clip_ratio/high_max": 0.0019186276003893, "clip_ratio/high_mean": 0.0006966680484765675, "clip_ratio/low_mean": 0.0003403620912649785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010370301097282209, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2050.0, "completions/mean_length": 811.4029541015625, "completions/mean_terminated_length": 562.9879760742188, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 4.578425655976677, "grad_norm": 0.16684193909168243, "learning_rate": 1e-06, "loss": -0.0567, "num_tokens": 283383804.0, "reward": 0.598214328289032, "reward_std": 0.13831600546836853, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 490 }, { "clip_ratio/high_max": 0.0020056581270182505, "clip_ratio/high_mean": 0.0006818503643444274, "clip_ratio/low_mean": 0.000503500452850858, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011853508403874002, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 697.3326416015625, "completions/mean_terminated_length": 538.516357421875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 4.587755102040816, "grad_norm": 0.2001650631427765, "learning_rate": 1e-06, "loss": -0.0028, "num_tokens": 283930614.0, "reward": 0.6830357313156128, "reward_std": 0.1457175612449646, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 491 }, { "clip_ratio/high_max": 0.0019435344038356561, "clip_ratio/high_mean": 0.0007646160665899515, "clip_ratio/low_mean": 0.0006633842331211781, "clip_ratio/low_min": 1.9512955987011082e-05, "clip_ratio/region_mean": 0.0014280002433224581, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3367.0, "completions/mean_length": 769.6439819335938, "completions/mean_terminated_length": 556.3147583007812, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 4.597084548104956, "grad_norm": 0.18549324572086334, "learning_rate": 1e-06, "loss": -0.0279, "num_tokens": 284487335.0, "reward": 0.637276828289032, "reward_std": 0.18013371527194977, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 492 }, { "clip_ratio/high_max": 0.0016129888463183306, "clip_ratio/high_mean": 0.0006472732002293924, "clip_ratio/low_mean": 0.00040173178149416344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010490049608051777, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 748.7064819335938, "completions/mean_terminated_length": 529.7990112304688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 4.606413994169096, "grad_norm": 0.18927405774593353, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 285018688.0, "reward": 0.5926339626312256, "reward_std": 0.13211314380168915, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 493 }, { "clip_ratio/high_max": 0.002222860977781238, "clip_ratio/high_mean": 0.0008036092076508794, "clip_ratio/low_mean": 0.0005098076007925556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013134168002579827, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3682.0, "completions/mean_length": 709.0982666015625, "completions/mean_terminated_length": 567.3209228515625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 4.615743440233236, "grad_norm": 0.2239789068698883, "learning_rate": 1e-06, "loss": -0.0127, "num_tokens": 285585968.0, "reward": 0.6718750596046448, "reward_std": 0.16953669488430023, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 494 }, { "clip_ratio/high_max": 0.001853333531471435, "clip_ratio/high_mean": 0.000694443686370505, "clip_ratio/low_mean": 0.0005493978615049855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012438415360520594, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2817.0, "completions/mean_length": 715.4219360351562, "completions/mean_terminated_length": 519.8511962890625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 4.625072886297376, "grad_norm": 0.26524800062179565, "learning_rate": 1e-06, "loss": -0.0184, "num_tokens": 286111914.0, "reward": 0.6830357313156128, "reward_std": 0.17765070497989655, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 495 }, { "clip_ratio/high_max": 0.001967742820852436, "clip_ratio/high_mean": 0.000702927791280672, "clip_ratio/low_mean": 0.000588283081924601, "clip_ratio/low_min": 3.1709791073808447e-05, "clip_ratio/region_mean": 0.0012912108650198206, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 1882.0, "completions/mean_length": 764.1172485351562, "completions/mean_terminated_length": 550.4335327148438, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 4.634402332361516, "grad_norm": 0.22960741817951202, "learning_rate": 1e-06, "loss": -0.0181, "num_tokens": 286667139.0, "reward": 0.5948660969734192, "reward_std": 0.17818482220172882, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 496 }, { "clip_ratio/high_max": 0.00152768318366725, "clip_ratio/high_mean": 0.000530095907379291, "clip_ratio/low_mean": 0.0005729503295697214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011030462264898233, "completions/clipped_ratio": 0.0234375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 602.1283569335938, "completions/mean_terminated_length": 518.2754516601562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 4.643731778425656, "grad_norm": 5.484728813171387, "learning_rate": 1e-06, "loss": -0.0071, "num_tokens": 287203462.0, "reward": 0.6651785969734192, "reward_std": 0.1493610441684723, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 497 }, { "clip_ratio/high_max": 0.0015428415281348862, "clip_ratio/high_mean": 0.0006701672446070006, "clip_ratio/low_mean": 0.0006359260405588429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001306093286984833, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3899.0, "completions/mean_length": 799.4766235351562, "completions/mean_terminated_length": 571.3162231445312, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 4.653061224489796, "grad_norm": 0.21233105659484863, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 287771761.0, "reward": 0.6417410969734192, "reward_std": 0.17066533863544464, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975653409957886, "step": 498 }, { "clip_ratio/high_max": 0.0018094013903464656, "clip_ratio/high_mean": 0.000651849810310523, "clip_ratio/low_mean": 0.0004923005108139478, "clip_ratio/low_min": 1.493072159064468e-05, "clip_ratio/region_mean": 0.001144150308391545, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3224.0, "completions/mean_length": 759.8828735351562, "completions/mean_terminated_length": 545.9275512695312, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 4.662390670553936, "grad_norm": 0.361331045627594, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 288322464.0, "reward": 0.6774553656578064, "reward_std": 0.17498140037059784, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 499 }, { "clip_ratio/high_max": 0.0023586605821037665, "clip_ratio/high_mean": 0.0008542319155822042, "clip_ratio/low_mean": 0.00040829024374033906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012625221606867854, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 736.6105346679688, "completions/mean_terminated_length": 542.265625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 4.671720116618076, "grad_norm": 0.197487935423851, "learning_rate": 1e-06, "loss": -0.0349, "num_tokens": 288872907.0, "reward": 0.6183035969734192, "reward_std": 0.14342734217643738, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 500 }, { "clip_ratio/high_max": 0.002165036043152213, "clip_ratio/high_mean": 0.0008253226878878195, "clip_ratio/low_mean": 0.00044876248966829735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012740851434500655, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3568.0, "completions/mean_length": 743.1082763671875, "completions/mean_terminated_length": 561.6576538085938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 4.681049562682215, "grad_norm": 0.19396717846393585, "learning_rate": 1e-06, "loss": -0.0223, "num_tokens": 289445516.0, "reward": 0.6082589626312256, "reward_std": 0.1599937528371811, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 501 }, { "clip_ratio/high_max": 0.0016229326211032458, "clip_ratio/high_mean": 0.0006127836386440322, "clip_ratio/low_mean": 0.0005866753099326161, "clip_ratio/low_min": 5.1067519962089136e-05, "clip_ratio/region_mean": 0.0011994589513051324, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 760.575927734375, "completions/mean_terminated_length": 559.2662963867188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.690379008746356, "grad_norm": 0.23123548924922943, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 290016672.0, "reward": 0.5680803656578064, "reward_std": 0.1706332564353943, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 502 }, { "clip_ratio/high_max": 0.001514357709311298, "clip_ratio/high_mean": 0.0006434163278754568, "clip_ratio/low_mean": 0.0005111019117975957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011545181987457909, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 859.4185791015625, "completions/mean_terminated_length": 559.4426879882812, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 4.699708454810495, "grad_norm": 0.21300344169139862, "learning_rate": 1e-06, "loss": -0.0349, "num_tokens": 290569695.0, "reward": 0.590401828289032, "reward_std": 0.16848641633987427, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 503 }, { "clip_ratio/high_max": 0.0020721697710541775, "clip_ratio/high_mean": 0.000666506194647809, "clip_ratio/low_mean": 0.0004435453745372797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011100515730504412, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 820.2589721679688, "completions/mean_terminated_length": 576.7386474609375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 4.709037900874636, "grad_norm": 0.21655043959617615, "learning_rate": 1e-06, "loss": -0.0233, "num_tokens": 291142287.0, "reward": 0.5915178656578064, "reward_std": 0.15326520800590515, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 504 }, { "clip_ratio/high_max": 0.001905234094010666, "clip_ratio/high_mean": 0.0006369381944750785, "clip_ratio/low_mean": 0.0003795098900809535, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010164481063839048, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 817.1864013671875, "completions/mean_terminated_length": 547.9118041992188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 4.718367346938775, "grad_norm": 0.18194395303726196, "learning_rate": 1e-06, "loss": -0.0356, "num_tokens": 291690726.0, "reward": 0.5859375, "reward_std": 0.14248163998126984, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 505 }, { "clip_ratio/high_max": 0.001590982086781878, "clip_ratio/high_mean": 0.0005425151657618699, "clip_ratio/low_mean": 0.00047243583435374603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001014950994431274, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3204.0, "completions/mean_length": 796.4241333007812, "completions/mean_terminated_length": 568.0525512695312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 4.727696793002916, "grad_norm": 0.1851402372121811, "learning_rate": 1e-06, "loss": -0.0247, "num_tokens": 292256266.0, "reward": 0.5680803656578064, "reward_std": 0.14628097414970398, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 506 }, { "clip_ratio/high_max": 0.001641770315472968, "clip_ratio/high_mean": 0.0006798308804718545, "clip_ratio/low_mean": 0.0006032846877133125, "clip_ratio/low_min": 1.2972187505511101e-05, "clip_ratio/region_mean": 0.001283115561818704, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2595.0, "completions/mean_length": 667.0178833007812, "completions/mean_terminated_length": 506.7850341796875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.737026239067055, "grad_norm": 0.2165297269821167, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 292788186.0, "reward": 0.65625, "reward_std": 0.16393959522247314, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 507 }, { "clip_ratio/high_max": 0.0019648484958452173, "clip_ratio/high_mean": 0.0007177483748819213, "clip_ratio/low_mean": 0.0004973882305421284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012151366099715233, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3965.0, "completions/mean_length": 858.8471069335938, "completions/mean_terminated_length": 580.2557373046875, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 4.746355685131196, "grad_norm": 0.20138254761695862, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 293361609.0, "reward": 0.5647321939468384, "reward_std": 0.16607898473739624, "rewards/verify_math_reward/mean": 0.5647321343421936, "rewards/verify_math_reward/std": 0.49606895446777344, "step": 508 }, { "clip_ratio/high_max": 0.0017334266631223727, "clip_ratio/high_mean": 0.0006477061779150972, "clip_ratio/low_mean": 0.0006191994889377384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012669056486629415, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 744.8158569335938, "completions/mean_terminated_length": 538.3447875976562, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 4.755685131195335, "grad_norm": 0.23863951861858368, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 293909596.0, "reward": 0.625, "reward_std": 0.17495255172252655, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 509 }, { "clip_ratio/high_max": 0.002056710440228926, "clip_ratio/high_mean": 0.0007373519492830383, "clip_ratio/low_mean": 0.0004097512669432035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011471032375993673, "completions/clipped_ratio": 0.060267857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 779.4006958007812, "completions/mean_terminated_length": 566.6972045898438, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.765014577259475, "grad_norm": 0.20938974618911743, "learning_rate": 1e-06, "loss": -0.0249, "num_tokens": 294471355.0, "reward": 0.6986607313156128, "reward_std": 0.17659832537174225, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 510 }, { "clip_ratio/high_max": 0.0019161519703629892, "clip_ratio/high_mean": 0.0007846968983358238, "clip_ratio/low_mean": 0.0004255761336935393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012102730470360257, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3427.0, "completions/mean_length": 827.9163208007812, "completions/mean_terminated_length": 597.549560546875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 4.774344023323615, "grad_norm": 0.20174439251422882, "learning_rate": 1e-06, "loss": -0.0296, "num_tokens": 295074488.0, "reward": 0.609375, "reward_std": 0.18141572177410126, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 511 }, { "clip_ratio/high_max": 0.001628550222449121, "clip_ratio/high_mean": 0.0005664115392391977, "clip_ratio/low_mean": 0.0005180246389500098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010844361822819337, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 661.0703125, "completions/mean_terminated_length": 500.5595703125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 4.783673469387755, "grad_norm": 0.188065305352211, "learning_rate": 1e-06, "loss": -0.0082, "num_tokens": 295588015.0, "reward": 0.6473214626312256, "reward_std": 0.12790516018867493, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 512 }, { "clip_ratio/high_max": 0.0017988044128287584, "clip_ratio/high_mean": 0.0007507897207688075, "clip_ratio/low_mean": 0.0004354124384917668, "clip_ratio/low_min": 1.3640331417263951e-05, "clip_ratio/region_mean": 0.001186202160170069, "completions/clipped_ratio": 0.044642857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 2530.0, "completions/mean_length": 697.2567138671875, "completions/mean_terminated_length": 538.4368896484375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 4.793002915451895, "grad_norm": 1.1061233282089233, "learning_rate": 1e-06, "loss": -0.0197, "num_tokens": 296141229.0, "reward": 0.5970982313156128, "reward_std": 0.15980830788612366, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 513 }, { "clip_ratio/high_max": 0.0017655004703556187, "clip_ratio/high_mean": 0.0006406353877537185, "clip_ratio/low_mean": 0.0004561484574878705, "clip_ratio/low_min": 2.1856967578059994e-05, "clip_ratio/region_mean": 0.0010967838352371473, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 787.3995971679688, "completions/mean_terminated_length": 571.0225830078125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 4.802332361516035, "grad_norm": 0.23085437715053558, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 296702347.0, "reward": 0.6350446939468384, "reward_std": 0.18193678557872772, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 514 }, { "clip_ratio/high_max": 0.0018188555113738403, "clip_ratio/high_mean": 0.0006787879956391407, "clip_ratio/low_mean": 0.00048321402209694497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011620020341069903, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3307.0, "completions/mean_length": 841.5502319335938, "completions/mean_terminated_length": 552.8809204101562, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 4.811661807580175, "grad_norm": 0.28857406973838806, "learning_rate": 1e-06, "loss": -0.0143, "num_tokens": 297254144.0, "reward": 0.606026828289032, "reward_std": 0.15826597809791565, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 515 }, { "clip_ratio/high_max": 0.002027556700340938, "clip_ratio/high_mean": 0.0008020605837373296, "clip_ratio/low_mean": 0.0007501269938074984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015521875757258385, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3265.0, "completions/mean_length": 826.0513916015625, "completions/mean_terminated_length": 566.0313110351562, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 4.820991253644315, "grad_norm": 0.21496886014938354, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 297818046.0, "reward": 0.578125, "reward_std": 0.19531114399433136, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 516 }, { "clip_ratio/high_max": 0.0018064766736642923, "clip_ratio/high_mean": 0.000677952555633965, "clip_ratio/low_mean": 0.0006463891731982585, "clip_ratio/low_min": 5.396787673817016e-05, "clip_ratio/region_mean": 0.001324341727013234, "completions/clipped_ratio": 0.0401785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3644.0, "completions/mean_length": 719.2422485351562, "completions/mean_terminated_length": 577.8895263671875, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 4.830320699708455, "grad_norm": 0.21425247192382812, "learning_rate": 1e-06, "loss": -0.0029, "num_tokens": 298403903.0, "reward": 0.6116071939468384, "reward_std": 0.18024571239948273, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 517 }, { "clip_ratio/high_max": 0.0017812663081713254, "clip_ratio/high_mean": 0.000571148573044411, "clip_ratio/low_mean": 0.00040367850806433125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009748270931595471, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3386.0, "completions/mean_length": 837.9576416015625, "completions/mean_terminated_length": 544.6544799804688, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 4.839650145772595, "grad_norm": 0.31801754236221313, "learning_rate": 1e-06, "loss": -0.0146, "num_tokens": 298933921.0, "reward": 0.6316964626312256, "reward_std": 0.12629005312919617, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 518 }, { "clip_ratio/high_max": 0.0019636725628515705, "clip_ratio/high_mean": 0.0006848009579698555, "clip_ratio/low_mean": 0.00046225982123360154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011470608005765826, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 841.7678833007812, "completions/mean_terminated_length": 565.9854736328125, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 4.848979591836734, "grad_norm": 0.19902126491069794, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 299484529.0, "reward": 0.5948660969734192, "reward_std": 0.1479395478963852, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 519 }, { "clip_ratio/high_max": 0.0015951618261169642, "clip_ratio/high_mean": 0.0005832678198203212, "clip_ratio/low_mean": 0.0004987134489056189, "clip_ratio/low_min": 2.4127414690156e-05, "clip_ratio/region_mean": 0.0010819812559930142, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 831.3973388671875, "completions/mean_terminated_length": 588.705078125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 4.858309037900875, "grad_norm": 0.17633138597011566, "learning_rate": 1e-06, "loss": -0.0219, "num_tokens": 300063341.0, "reward": 0.5569196939468384, "reward_std": 0.15060842037200928, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 520 }, { "clip_ratio/high_max": 0.0016294401102641132, "clip_ratio/high_mean": 0.000635173610135098, "clip_ratio/low_mean": 0.0005369109121602378, "clip_ratio/low_min": 5.691649221262196e-06, "clip_ratio/region_mean": 0.001172084534118767, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4018.0, "completions/mean_length": 778.2355346679688, "completions/mean_terminated_length": 582.150146484375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.867638483965014, "grad_norm": 0.19363398849964142, "learning_rate": 1e-06, "loss": -0.0226, "num_tokens": 300648728.0, "reward": 0.6305803656578064, "reward_std": 0.1528654545545578, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 521 }, { "clip_ratio/high_max": 0.0020411835721461102, "clip_ratio/high_mean": 0.0007113227475201711, "clip_ratio/low_mean": 0.0004420190798555268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011533418255567085, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2730.0, "completions/mean_length": 685.357177734375, "completions/mean_terminated_length": 513.425537109375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 4.876967930029155, "grad_norm": 0.212229385972023, "learning_rate": 1e-06, "loss": -0.0367, "num_tokens": 301171648.0, "reward": 0.6897321939468384, "reward_std": 0.15492630004882812, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 522 }, { "clip_ratio/high_max": 0.0016648603341309354, "clip_ratio/high_mean": 0.0007063466109684668, "clip_ratio/low_mean": 0.0005030818419982097, "clip_ratio/low_min": 2.4855627088982146e-05, "clip_ratio/region_mean": 0.0012094284647901077, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3781.0, "completions/mean_length": 758.6417846679688, "completions/mean_terminated_length": 565.5714111328125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 4.886297376093294, "grad_norm": 0.1821240335702896, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 301747087.0, "reward": 0.582589328289032, "reward_std": 0.17746664583683014, "rewards/verify_math_reward/mean": 0.5825892686843872, "rewards/verify_math_reward/std": 0.493407279253006, "step": 523 }, { "clip_ratio/high_max": 0.001530692014057422, "clip_ratio/high_mean": 0.0005710766499760211, "clip_ratio/low_mean": 0.00046945452277213917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001040531156832003, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3729.0, "completions/mean_length": 776.4832763671875, "completions/mean_terminated_length": 521.1358642578125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 4.895626822157435, "grad_norm": 0.1970164030790329, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 302271976.0, "reward": 0.6462053656578064, "reward_std": 0.12985865771770477, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 524 }, { "clip_ratio/high_max": 0.0016025000186346006, "clip_ratio/high_mean": 0.000574866196984658, "clip_ratio/low_mean": 0.0005449543987197103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011198205829714425, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1774.0, "completions/mean_length": 858.8549194335938, "completions/mean_terminated_length": 597.2279663085938, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 4.904956268221574, "grad_norm": 0.1987365186214447, "learning_rate": 1e-06, "loss": -0.0176, "num_tokens": 302863254.0, "reward": 0.535714328289032, "reward_std": 0.15793287754058838, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 525 }, { "clip_ratio/high_max": 0.0018846390703401994, "clip_ratio/high_mean": 0.0007924009560156264, "clip_ratio/low_mean": 0.0005120176338095916, "clip_ratio/low_min": 1.2415574019541964e-05, "clip_ratio/region_mean": 0.0013044185761827976, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 919.724365234375, "completions/mean_terminated_length": 560.6670532226562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 4.914285714285715, "grad_norm": 0.2049759328365326, "learning_rate": 1e-06, "loss": -0.0363, "num_tokens": 303406407.0, "reward": 0.6205357313156128, "reward_std": 0.18069963157176971, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 526 }, { "clip_ratio/high_max": 0.0016742554980737623, "clip_ratio/high_mean": 0.0005448062183859292, "clip_ratio/low_mean": 0.00034900177320196235, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008938079972722335, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 818.396240234375, "completions/mean_terminated_length": 549.2210083007812, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.923615160349854, "grad_norm": 0.1686515510082245, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 303950626.0, "reward": 0.5915178656578064, "reward_std": 0.11114510893821716, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 527 }, { "clip_ratio/high_max": 0.0016543881138204597, "clip_ratio/high_mean": 0.000614532134932233, "clip_ratio/low_mean": 0.0004907750080747064, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011053071539208759, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 819.8125610351562, "completions/mean_terminated_length": 588.87451171875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 4.932944606413994, "grad_norm": 0.18196366727352142, "learning_rate": 1e-06, "loss": -0.0343, "num_tokens": 304532546.0, "reward": 0.621651828289032, "reward_std": 0.1621701568365097, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 528 }, { "clip_ratio/high_max": 0.001804248575353995, "clip_ratio/high_mean": 0.0006423912036552792, "clip_ratio/low_mean": 0.0003004134805451031, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000942804694204824, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3302.0, "completions/mean_length": 770.8348388671875, "completions/mean_terminated_length": 532.1865844726562, "completions/min_length": 186.0, "completions/min_terminated_length": 186.0, "epoch": 4.942274052478134, "grad_norm": 0.19462347030639648, "learning_rate": 1e-06, "loss": -0.0328, "num_tokens": 305057566.0, "reward": 0.645089328289032, "reward_std": 0.14196309447288513, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 529 }, { "clip_ratio/high_max": 0.0017251281169592403, "clip_ratio/high_mean": 0.0006889636570122093, "clip_ratio/low_mean": 0.0004455980179045582, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001134561694925651, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4061.0, "completions/mean_length": 817.2756958007812, "completions/mean_terminated_length": 573.5335693359375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 4.9516034985422746, "grad_norm": 0.2360532432794571, "learning_rate": 1e-06, "loss": -0.0155, "num_tokens": 305625701.0, "reward": 0.6506696939468384, "reward_std": 0.15022864937782288, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 530 }, { "clip_ratio/high_max": 0.001609930634003831, "clip_ratio/high_mean": 0.0005716356172342785, "clip_ratio/low_mean": 0.00044805481638832134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010196904331678525, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3932.0, "completions/mean_length": 756.1261596679688, "completions/mean_terminated_length": 516.4222412109375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 4.960932944606414, "grad_norm": 0.2021724432706833, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 306141678.0, "reward": 0.7008928656578064, "reward_std": 0.13200366497039795, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 531 }, { "clip_ratio/high_max": 0.001937603625265183, "clip_ratio/high_mean": 0.0006960711689316668, "clip_ratio/low_mean": 0.0006089017770136707, "clip_ratio/low_min": 3.501400715322234e-05, "clip_ratio/region_mean": 0.0013049729568592738, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1958.0, "completions/mean_length": 776.3616333007812, "completions/mean_terminated_length": 546.6014404296875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 4.970262390670554, "grad_norm": 0.23575113713741302, "learning_rate": 1e-06, "loss": -0.0192, "num_tokens": 306679418.0, "reward": 0.6462053656578064, "reward_std": 0.16972285509109497, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 532 }, { "clip_ratio/high_max": 0.0018032093175861519, "clip_ratio/high_mean": 0.000643911968836619, "clip_ratio/low_mean": 0.0004977471126039745, "clip_ratio/low_min": 3.773394928430207e-05, "clip_ratio/region_mean": 0.001141659085988067, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 793.4542846679688, "completions/mean_terminated_length": 556.4293823242188, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 4.979591836734694, "grad_norm": 0.19462807476520538, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 307237297.0, "reward": 0.6071428656578064, "reward_std": 0.14057300984859467, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865824937820435, "step": 533 }, { "clip_ratio/high_max": 0.0015770237005199306, "clip_ratio/high_mean": 0.0004682087228502496, "clip_ratio/low_mean": 0.0003231561415759643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007913648523754091, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2732.0, "completions/mean_length": 798.2500610351562, "completions/mean_terminated_length": 557.3365478515625, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 4.988921282798834, "grad_norm": 2.719938278198242, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 307794881.0, "reward": 0.5948660969734192, "reward_std": 0.12729713320732117, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 534 }, { "clip_ratio/high_max": 0.0019295079473522492, "clip_ratio/high_mean": 0.0006116930326243164, "clip_ratio/low_mean": 0.00046655322330479976, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010782462632050738, "completions/clipped_ratio": 0.06534090909090906, "completions/max_length": 4096.0, "completions/max_terminated_length": 1694.0, "completions/mean_length": 743.0823974609375, "completions/mean_terminated_length": 508.68389892578125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 4.998250728862974, "grad_norm": 0.17135488986968994, "learning_rate": 1e-06, "loss": -0.0213, "num_tokens": 308343258.0, "reward": 0.6328125, "reward_std": 0.12798002362251282, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 535 }, { "clip_ratio/high_max": 0.001734473873511888, "clip_ratio/high_mean": 0.0007146760399336927, "clip_ratio/low_mean": 0.0005809196209156653, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012955956481164321, "completions/clipped_ratio": 0.049107142857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3428.0, "completions/mean_length": 738.5892944335938, "completions/mean_terminated_length": 565.201904296875, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 5.0093294460641395, "grad_norm": 0.21436621248722076, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 308902402.0, "reward": 0.6696428656578064, "reward_std": 0.1736377328634262, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 536 }, { "clip_ratio/high_max": 0.0015680753713240847, "clip_ratio/high_mean": 0.0006392968753061723, "clip_ratio/low_mean": 0.0004740121639770223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011133090265502688, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4003.0, "completions/mean_length": 805.2689819335938, "completions/mean_terminated_length": 590.0606079101562, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 5.01865889212828, "grad_norm": 0.19323799014091492, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 309480363.0, "reward": 0.5837053656578064, "reward_std": 0.16675862669944763, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 537 }, { "clip_ratio/high_max": 0.0018389608048892114, "clip_ratio/high_mean": 0.0006140505993244005, "clip_ratio/low_mean": 0.0004651449494303961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010791955355671234, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3725.0, "completions/mean_length": 924.3047485351562, "completions/mean_terminated_length": 596.1982421875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 5.0279883381924195, "grad_norm": 0.16805453598499298, "learning_rate": 1e-06, "loss": -0.0374, "num_tokens": 310054836.0, "reward": 0.5970982313156128, "reward_std": 0.15446916222572327, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 538 }, { "clip_ratio/high_max": 0.0019854991260217503, "clip_ratio/high_mean": 0.0005954767184448428, "clip_ratio/low_mean": 0.00045045749357086606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001045934248395497, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3103.0, "completions/mean_length": 811.9486694335938, "completions/mean_terminated_length": 524.9927368164062, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 5.03731778425656, "grad_norm": 0.1802738904953003, "learning_rate": 1e-06, "loss": -0.0488, "num_tokens": 310580038.0, "reward": 0.6395089626312256, "reward_std": 0.12181992828845978, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 539 }, { "clip_ratio/high_max": 0.0016207788430619985, "clip_ratio/high_mean": 0.0006753572306479327, "clip_ratio/low_mean": 0.00038364015745173674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010589973753667437, "completions/clipped_ratio": 0.036830357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3873.0, "completions/mean_length": 692.3460083007812, "completions/mean_terminated_length": 562.1946411132812, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.0466472303206995, "grad_norm": 0.25847533345222473, "learning_rate": 1e-06, "loss": -0.0239, "num_tokens": 311148324.0, "reward": 0.6964285969734192, "reward_std": 0.15207336843013763, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565731525421, "step": 540 }, { "clip_ratio/high_max": 0.0020743900931847747, "clip_ratio/high_mean": 0.0007518874990637414, "clip_ratio/low_mean": 0.0005199604274821468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012718479047180153, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3514.0, "completions/mean_length": 792.4877319335938, "completions/mean_terminated_length": 521.1847534179688, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 5.05597667638484, "grad_norm": 0.24380233883857727, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 311662793.0, "reward": 0.6651785969734192, "reward_std": 0.14158332347869873, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 541 }, { "clip_ratio/high_max": 0.001640480197238503, "clip_ratio/high_mean": 0.0006248580903047696, "clip_ratio/low_mean": 0.0006177023233249201, "clip_ratio/low_min": 1.4688601368106902e-05, "clip_ratio/region_mean": 0.0012425604181771632, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3033.0, "completions/mean_length": 766.1004638671875, "completions/mean_terminated_length": 544.107177734375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 5.0653061224489795, "grad_norm": 0.2195175290107727, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 312202379.0, "reward": 0.6183035969734192, "reward_std": 0.16848823428153992, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 542 }, { "clip_ratio/high_max": 0.0018057929955830332, "clip_ratio/high_mean": 0.0005769770186816459, "clip_ratio/low_mean": 0.0005559519349844777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011329289663990494, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1964.0, "completions/mean_length": 703.802490234375, "completions/mean_terminated_length": 524.4265747070312, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 5.07463556851312, "grad_norm": 0.19934502243995667, "learning_rate": 1e-06, "loss": -0.0139, "num_tokens": 312736114.0, "reward": 0.6473214626312256, "reward_std": 0.14969734847545624, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 543 }, { "clip_ratio/high_max": 0.0016377591200580355, "clip_ratio/high_mean": 0.000579360916162841, "clip_ratio/low_mean": 0.000343085593158321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009224465247825719, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 803.0949096679688, "completions/mean_terminated_length": 545.527099609375, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 5.0839650145772595, "grad_norm": 0.1511351764202118, "learning_rate": 1e-06, "loss": -0.0122, "num_tokens": 313282951.0, "reward": 0.6328125, "reward_std": 0.11644896864891052, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 544 }, { "clip_ratio/high_max": 0.001759360806318, "clip_ratio/high_mean": 0.0006546797612827504, "clip_ratio/low_mean": 0.0005265207164484309, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011812004704552237, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3742.0, "completions/mean_length": 816.489990234375, "completions/mean_terminated_length": 585.3178100585938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 5.093294460641399, "grad_norm": 0.33368080854415894, "learning_rate": 1e-06, "loss": -0.0391, "num_tokens": 313872118.0, "reward": 0.5290178656578064, "reward_std": 0.17344370484352112, "rewards/verify_math_reward/mean": 0.5290178656578064, "rewards/verify_math_reward/std": 0.49943602085113525, "step": 545 }, { "clip_ratio/high_max": 0.0018291265150764957, "clip_ratio/high_mean": 0.0007576696643809555, "clip_ratio/low_mean": 0.00040248151799460175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001160151183285052, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 733.5904541015625, "completions/mean_terminated_length": 534.866455078125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.1026239067055394, "grad_norm": 0.1947004348039627, "learning_rate": 1e-06, "loss": -0.0241, "num_tokens": 314407583.0, "reward": 0.652901828289032, "reward_std": 0.15649862587451935, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 546 }, { "clip_ratio/high_max": 0.0015986562939360738, "clip_ratio/high_mean": 0.0005538484174394398, "clip_ratio/low_mean": 0.0005457822521748312, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010996306664310396, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 740.5089721679688, "completions/mean_terminated_length": 554.75146484375, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 5.111953352769679, "grad_norm": 0.1526365578174591, "learning_rate": 1e-06, "loss": -0.0193, "num_tokens": 314968583.0, "reward": 0.6785714626312256, "reward_std": 0.12433726340532303, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 547 }, { "clip_ratio/high_max": 0.0017323404536000453, "clip_ratio/high_mean": 0.0007162682832131395, "clip_ratio/low_mean": 0.00037293118566594785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010891994679695927, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3698.0, "completions/mean_length": 717.3717041015625, "completions/mean_terminated_length": 538.7132568359375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 5.121282798833819, "grad_norm": 0.21684935688972473, "learning_rate": 1e-06, "loss": -0.0189, "num_tokens": 315508996.0, "reward": 0.723214328289032, "reward_std": 0.1310625821352005, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 548 }, { "clip_ratio/high_max": 0.0018178919526690152, "clip_ratio/high_mean": 0.0006247809515116387, "clip_ratio/low_mean": 0.0003424692895350745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009672502528701443, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3700.0, "completions/mean_length": 788.0480346679688, "completions/mean_terminated_length": 592.5425415039062, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.130612244897959, "grad_norm": 0.1766895204782486, "learning_rate": 1e-06, "loss": -0.0381, "num_tokens": 316101279.0, "reward": 0.6674107313156128, "reward_std": 0.1288476437330246, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 549 }, { "clip_ratio/high_max": 0.0015654471280868165, "clip_ratio/high_mean": 0.0005066074886599381, "clip_ratio/low_mean": 0.0005465031463245396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010531106272537727, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 768.8225708007812, "completions/mean_terminated_length": 542.7807006835938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 5.139941690962099, "grad_norm": 0.17811238765716553, "learning_rate": 1e-06, "loss": -0.0113, "num_tokens": 316655928.0, "reward": 0.5446428656578064, "reward_std": 0.14722415804862976, "rewards/verify_math_reward/mean": 0.5446428656578064, "rewards/verify_math_reward/std": 0.49828118085861206, "step": 550 }, { "clip_ratio/high_max": 0.001819049855839694, "clip_ratio/high_mean": 0.0005607957336906111, "clip_ratio/low_mean": 0.00045058580462864484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010113815314980457, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3800.0, "completions/mean_length": 792.5569458007812, "completions/mean_terminated_length": 529.87353515625, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 5.149271137026239, "grad_norm": 0.19947810471057892, "learning_rate": 1e-06, "loss": -0.0464, "num_tokens": 317184059.0, "reward": 0.6640625, "reward_std": 0.12982404232025146, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 551 }, { "clip_ratio/high_max": 0.0016915455998969264, "clip_ratio/high_mean": 0.0007980726641108049, "clip_ratio/low_mean": 0.00047955109675967833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012776237817888614, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2457.0, "completions/mean_length": 781.3158569335938, "completions/mean_terminated_length": 589.5572509765625, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 5.158600583090379, "grad_norm": 0.20176364481449127, "learning_rate": 1e-06, "loss": -0.0401, "num_tokens": 317770694.0, "reward": 0.6339285969734192, "reward_std": 0.18644897639751434, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 552 }, { "clip_ratio/high_max": 0.001715225946099963, "clip_ratio/high_mean": 0.0005189865278225625, "clip_ratio/low_mean": 0.0003097739145232481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008287604505312629, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3892.0, "completions/mean_length": 768.2511596679688, "completions/mean_terminated_length": 546.4011840820312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 5.167930029154519, "grad_norm": 0.15910810232162476, "learning_rate": 1e-06, "loss": -0.0162, "num_tokens": 318315895.0, "reward": 0.6484375, "reward_std": 0.1289571076631546, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 553 }, { "clip_ratio/high_max": 0.001746124053170206, "clip_ratio/high_mean": 0.0006517072833958082, "clip_ratio/low_mean": 0.0005152529715815035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011669602390611544, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3371.0, "completions/mean_length": 858.1998291015625, "completions/mean_terminated_length": 596.5198974609375, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 5.1772594752186585, "grad_norm": 0.2465633898973465, "learning_rate": 1e-06, "loss": -0.0281, "num_tokens": 318897826.0, "reward": 0.6004464626312256, "reward_std": 0.14429426193237305, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 554 }, { "clip_ratio/high_max": 0.0017360788224323187, "clip_ratio/high_mean": 0.0006247759920370299, "clip_ratio/low_mean": 0.0004566940942822839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010814701090566814, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2604.0, "completions/mean_length": 814.8638916015625, "completions/mean_terminated_length": 566.710693359375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.186588921282799, "grad_norm": 0.21385346353054047, "learning_rate": 1e-06, "loss": -0.0092, "num_tokens": 319455800.0, "reward": 0.660714328289032, "reward_std": 0.14458850026130676, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 555 }, { "clip_ratio/high_max": 0.0016607689794909675, "clip_ratio/high_mean": 0.000572077973629348, "clip_ratio/low_mean": 0.0003538458436196379, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000925923821341712, "completions/clipped_ratio": 0.0424107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4039.0, "completions/mean_length": 697.9855346679688, "completions/mean_terminated_length": 547.4906616210938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 5.1959183673469385, "grad_norm": 0.18615104258060455, "learning_rate": 1e-06, "loss": -0.015, "num_tokens": 320016659.0, "reward": 0.6930803656578064, "reward_std": 0.11419306695461273, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 556 }, { "clip_ratio/high_max": 0.0016630271456961054, "clip_ratio/high_mean": 0.0005218762125878129, "clip_ratio/low_mean": 0.00037493973923119484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008968159654614283, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4008.0, "completions/mean_length": 883.4297485351562, "completions/mean_terminated_length": 611.177978515625, "completions/min_length": 198.0, "completions/min_terminated_length": 198.0, "epoch": 5.205247813411079, "grad_norm": 0.1741432100534439, "learning_rate": 1e-06, "loss": -0.0216, "num_tokens": 320610460.0, "reward": 0.6227678656578064, "reward_std": 0.14098487794399261, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 557 }, { "clip_ratio/high_max": 0.0018830596418411005, "clip_ratio/high_mean": 0.0006792893746023765, "clip_ratio/low_mean": 0.00043912097498832736, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011184103314008098, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2745.0, "completions/mean_length": 818.3594360351562, "completions/mean_terminated_length": 536.2836303710938, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.214577259475218, "grad_norm": 0.18685314059257507, "learning_rate": 1e-06, "loss": -0.0417, "num_tokens": 321148422.0, "reward": 0.6462053656578064, "reward_std": 0.1511276513338089, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 558 }, { "clip_ratio/high_max": 0.002113575639668852, "clip_ratio/high_mean": 0.0007228144677355886, "clip_ratio/low_mean": 0.0007131807924452005, "clip_ratio/low_min": 1.3513513295038138e-05, "clip_ratio/region_mean": 0.0014359952438098844, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3617.0, "completions/mean_length": 825.2142944335938, "completions/mean_terminated_length": 548.029052734375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.223906705539359, "grad_norm": 0.21293967962265015, "learning_rate": 1e-06, "loss": -0.022, "num_tokens": 321686406.0, "reward": 0.660714328289032, "reward_std": 0.17539508640766144, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 559 }, { "clip_ratio/high_max": 0.001891877464913705, "clip_ratio/high_mean": 0.0005734050190540074, "clip_ratio/low_mean": 0.00043106369594170246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001004468711471418, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2605.0, "completions/mean_length": 864.3995971679688, "completions/mean_terminated_length": 543.2221069335938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 5.233236151603498, "grad_norm": 0.1860976368188858, "learning_rate": 1e-06, "loss": -0.0182, "num_tokens": 322218852.0, "reward": 0.5680803656578064, "reward_std": 0.14260178804397583, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200122833252, "step": 560 }, { "clip_ratio/high_max": 0.0017772664286894724, "clip_ratio/high_mean": 0.0006365105400618631, "clip_ratio/low_mean": 0.00041055485871765995, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010470654087839648, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3952.0, "completions/mean_length": 829.6317138671875, "completions/mean_terminated_length": 569.8964233398438, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 5.242565597667639, "grad_norm": 0.18890196084976196, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 322782994.0, "reward": 0.6674107313156128, "reward_std": 0.14658337831497192, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 561 }, { "clip_ratio/high_max": 0.0021052542360848747, "clip_ratio/high_mean": 0.0008074982433754485, "clip_ratio/low_mean": 0.0006057855507606291, "clip_ratio/low_min": 1.793400224414654e-05, "clip_ratio/region_mean": 0.0014132837823126465, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2602.0, "completions/mean_length": 794.700927734375, "completions/mean_terminated_length": 545.0227661132812, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 5.251895043731778, "grad_norm": 0.2046925127506256, "learning_rate": 1e-06, "loss": -0.0069, "num_tokens": 323322318.0, "reward": 0.6026785969734192, "reward_std": 0.15965674817562103, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 562 }, { "clip_ratio/high_max": 0.001974537288333522, "clip_ratio/high_mean": 0.0007292029949894641, "clip_ratio/low_mean": 0.0004600559491336753, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001189258968224749, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3237.0, "completions/mean_length": 850.5636596679688, "completions/mean_terminated_length": 558.3953857421875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 5.261224489795918, "grad_norm": 0.2143038660287857, "learning_rate": 1e-06, "loss": -0.0708, "num_tokens": 323866591.0, "reward": 0.6004464626312256, "reward_std": 0.1802852600812912, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 563 }, { "clip_ratio/high_max": 0.0020482320796872955, "clip_ratio/high_mean": 0.0007716327954767621, "clip_ratio/low_mean": 0.0005118950275573297, "clip_ratio/low_min": 3.246057531214319e-05, "clip_ratio/region_mean": 0.0012835278248530813, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3789.0, "completions/mean_length": 850.286865234375, "completions/mean_terminated_length": 604.8126831054688, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 5.270553935860058, "grad_norm": 0.28832581639289856, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 324470048.0, "reward": 0.5803571939468384, "reward_std": 0.17213143408298492, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761425971985, "step": 564 }, { "clip_ratio/high_max": 0.0017895134296850301, "clip_ratio/high_mean": 0.0006924274675839115, "clip_ratio/low_mean": 0.0006338041748676915, "clip_ratio/low_min": 1.4541647033183835e-05, "clip_ratio/region_mean": 0.0013262316351756454, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1643.0, "completions/mean_length": 833.3750610351562, "completions/mean_terminated_length": 530.9853515625, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 5.279883381924198, "grad_norm": 0.2057003527879715, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 324996464.0, "reward": 0.5970982313156128, "reward_std": 0.16330061852931976, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 565 }, { "clip_ratio/high_max": 0.0017614452117413748, "clip_ratio/high_mean": 0.0005621731861538137, "clip_ratio/low_mean": 0.00044188695392222144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010040601300715934, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3079.0, "completions/mean_length": 788.146240234375, "completions/mean_terminated_length": 533.6959228515625, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 5.289212827988338, "grad_norm": 0.19286105036735535, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 325526403.0, "reward": 0.6417410969734192, "reward_std": 0.13505050539970398, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 566 }, { "clip_ratio/high_max": 0.0020094738574698567, "clip_ratio/high_mean": 0.0007397602312266827, "clip_ratio/low_mean": 0.0005216166644004261, "clip_ratio/low_min": 3.568305510270875e-05, "clip_ratio/region_mean": 0.0012613769104063977, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 818.1864013671875, "completions/mean_terminated_length": 548.9939575195312, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 5.298542274052478, "grad_norm": 0.21803763508796692, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 326066434.0, "reward": 0.6149553656578064, "reward_std": 0.1800595372915268, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 567 }, { "clip_ratio/high_max": 0.002007713159400737, "clip_ratio/high_mean": 0.0006402884300769074, "clip_ratio/low_mean": 0.0006231778515939368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012634662925847806, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2609.0, "completions/mean_length": 856.6763916015625, "completions/mean_terminated_length": 539.0955810546875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 5.307871720116618, "grad_norm": 0.20887604355812073, "learning_rate": 1e-06, "loss": -0.0399, "num_tokens": 326593096.0, "reward": 0.609375, "reward_std": 0.14789538085460663, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 568 }, { "clip_ratio/high_max": 0.001692012679995969, "clip_ratio/high_mean": 0.0005945125139987795, "clip_ratio/low_mean": 0.0004166983571849414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010112109048350248, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3603.0, "completions/mean_length": 770.3939819335938, "completions/mean_terminated_length": 565.4988403320312, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 5.317201166180758, "grad_norm": 0.2038104236125946, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 327162633.0, "reward": 0.6205357313156128, "reward_std": 0.14004099369049072, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 569 }, { "clip_ratio/high_max": 0.0015329128054872854, "clip_ratio/high_mean": 0.0005875737606402254, "clip_ratio/low_mean": 0.0004885954831479467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010761692301457515, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4006.0, "completions/mean_length": 817.1563110351562, "completions/mean_terminated_length": 560.6883544921875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 5.326530612244898, "grad_norm": 0.19578103721141815, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 327713533.0, "reward": 0.5970982313156128, "reward_std": 0.14789608120918274, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 570 }, { "clip_ratio/high_max": 0.0017812917576520704, "clip_ratio/high_mean": 0.0007292654245247832, "clip_ratio/low_mean": 0.0005467043170028774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012759697419824079, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3161.0, "completions/mean_length": 817.0814819335938, "completions/mean_terminated_length": 552.0784301757812, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 5.335860058309038, "grad_norm": 0.317152202129364, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 328253430.0, "reward": 0.6618303656578064, "reward_std": 0.16134923696517944, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 571 }, { "clip_ratio/high_max": 0.0013462476381391753, "clip_ratio/high_mean": 0.0004718451064036344, "clip_ratio/low_mean": 0.0002711944046041026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007430395289702574, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3398.0, "completions/mean_length": 737.3717041015625, "completions/mean_terminated_length": 543.07080078125, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 5.345189504373177, "grad_norm": 0.20440132915973663, "learning_rate": 1e-06, "loss": -0.0282, "num_tokens": 328806099.0, "reward": 0.5881696939468384, "reward_std": 0.11280439049005508, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 572 }, { "clip_ratio/high_max": 0.0013336496558622457, "clip_ratio/high_mean": 0.00046210142363634077, "clip_ratio/low_mean": 0.0005868909111086396, "clip_ratio/low_min": 2.3629489078302868e-05, "clip_ratio/region_mean": 0.0010489923406566959, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 726.6529541015625, "completions/mean_terminated_length": 527.5189208984375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 5.354518950437318, "grad_norm": 0.21076786518096924, "learning_rate": 1e-06, "loss": -0.0164, "num_tokens": 329346956.0, "reward": 0.6584821939468384, "reward_std": 0.13909989595413208, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 573 }, { "clip_ratio/high_max": 0.0020970545265299734, "clip_ratio/high_mean": 0.0008852681348798797, "clip_ratio/low_mean": 0.0005595254970103269, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014447936446231324, "completions/clipped_ratio": 0.056919642857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 714.9006958007812, "completions/mean_terminated_length": 510.8343200683594, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 5.363848396501457, "grad_norm": 0.22876305878162384, "learning_rate": 1e-06, "loss": -0.0158, "num_tokens": 329869163.0, "reward": 0.6082589626312256, "reward_std": 0.16468819975852966, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 574 }, { "clip_ratio/high_max": 0.0015995872017811053, "clip_ratio/high_mean": 0.0005446128234325442, "clip_ratio/low_mean": 0.0004184743111181888, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009630871209083125, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 825.6752319335938, "completions/mean_terminated_length": 607.653564453125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 5.373177842565598, "grad_norm": 0.20254768431186676, "learning_rate": 1e-06, "loss": -0.0189, "num_tokens": 330472072.0, "reward": 0.6004464626312256, "reward_std": 0.1584189236164093, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 575 }, { "clip_ratio/high_max": 0.0018277046874572989, "clip_ratio/high_mean": 0.0006004279775879695, "clip_ratio/low_mean": 0.0003013545033354603, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000901782475921209, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4060.0, "completions/mean_length": 817.8772583007812, "completions/mean_terminated_length": 557.2072143554688, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 5.382507288629737, "grad_norm": 0.18974773585796356, "learning_rate": 1e-06, "loss": -0.0264, "num_tokens": 331031098.0, "reward": 0.6361607313156128, "reward_std": 0.1442061960697174, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 576 }, { "clip_ratio/high_max": 0.0011318937667965656, "clip_ratio/high_mean": 0.00037870730284339515, "clip_ratio/low_mean": 0.0002643170196279243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006430243211070774, "completions/clipped_ratio": 0.0457589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 743.2767944335938, "completions/mean_terminated_length": 582.5029296875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 5.391836734693878, "grad_norm": 0.16180872917175293, "learning_rate": 1e-06, "loss": -0.0083, "num_tokens": 331615714.0, "reward": 0.6863839626312256, "reward_std": 0.09574238210916519, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 577 }, { "clip_ratio/high_max": 0.00139545380443451, "clip_ratio/high_mean": 0.00044874311970488634, "clip_ratio/low_mean": 0.00036537688993121265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008141200014506467, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4076.0, "completions/mean_length": 854.4877319335938, "completions/mean_terminated_length": 558.3690795898438, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.401166180758017, "grad_norm": 0.15557047724723816, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 332169759.0, "reward": 0.65625, "reward_std": 0.11712227016687393, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 578 }, { "clip_ratio/high_max": 0.0019078557161265053, "clip_ratio/high_mean": 0.000699598141181923, "clip_ratio/low_mean": 0.00045237674294185126, "clip_ratio/low_min": 1.0903698239417281e-05, "clip_ratio/region_mean": 0.0011519748768478166, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2283.0, "completions/mean_length": 899.5167846679688, "completions/mean_terminated_length": 564.4968872070312, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 5.410495626822158, "grad_norm": 0.1839502453804016, "learning_rate": 1e-06, "loss": -0.0196, "num_tokens": 332721606.0, "reward": 0.598214328289032, "reward_std": 0.13996821641921997, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 579 }, { "clip_ratio/high_max": 0.001396314461089787, "clip_ratio/high_mean": 0.000474154721814557, "clip_ratio/low_mean": 0.0004216043334963615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008957590653153602, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 793.8705444335938, "completions/mean_terminated_length": 565.3222045898438, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 5.419825072886297, "grad_norm": 0.5899951457977295, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 333281082.0, "reward": 0.6662946939468384, "reward_std": 0.13984736800193787, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 580 }, { "clip_ratio/high_max": 0.001612413179827854, "clip_ratio/high_mean": 0.0005793492891825736, "clip_ratio/low_mean": 0.0004715553150163032, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001050904600560898, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4060.0, "completions/mean_length": 823.3314819335938, "completions/mean_terminated_length": 528.711669921875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 5.429154518950437, "grad_norm": 0.19468367099761963, "learning_rate": 1e-06, "loss": -0.0269, "num_tokens": 333801907.0, "reward": 0.6395089626312256, "reward_std": 0.12302455306053162, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 581 }, { "clip_ratio/high_max": 0.002196735735196853, "clip_ratio/high_mean": 0.0006915792146173771, "clip_ratio/low_mean": 0.00044164179780636914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011332209760439582, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2405.0, "completions/mean_length": 785.1808471679688, "completions/mean_terminated_length": 543.3125610351562, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 5.438483965014577, "grad_norm": 0.19301320612430573, "learning_rate": 1e-06, "loss": -0.019, "num_tokens": 334337101.0, "reward": 0.668526828289032, "reward_std": 0.15495768189430237, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 582 }, { "clip_ratio/high_max": 0.00198226571228588, "clip_ratio/high_mean": 0.0006953595129743917, "clip_ratio/low_mean": 0.0003817330175479583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001077092510968214, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3544.0, "completions/mean_length": 739.5223388671875, "completions/mean_terminated_length": 528.4982299804688, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 5.447813411078717, "grad_norm": 0.20036178827285767, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 334865513.0, "reward": 0.668526828289032, "reward_std": 0.14996904134750366, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 583 }, { "clip_ratio/high_max": 0.0017318636273557786, "clip_ratio/high_mean": 0.000776032597059384, "clip_ratio/low_mean": 0.0004366377875157923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001212670365930535, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2489.0, "completions/mean_length": 828.739990234375, "completions/mean_terminated_length": 538.9343872070312, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 5.457142857142857, "grad_norm": 0.2247992902994156, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 335393696.0, "reward": 0.598214328289032, "reward_std": 0.16750863194465637, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 584 }, { "clip_ratio/high_max": 0.0017781036040105391, "clip_ratio/high_mean": 0.0006188633406054578, "clip_ratio/low_mean": 0.0004684656846620783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010873290157178417, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3043.0, "completions/mean_length": 847.3873291015625, "completions/mean_terminated_length": 563.5278930664062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.466472303206997, "grad_norm": 0.27033933997154236, "learning_rate": 1e-06, "loss": -0.0222, "num_tokens": 335955011.0, "reward": 0.5948660969734192, "reward_std": 0.14027202129364014, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 585 }, { "clip_ratio/high_max": 0.0020864395082753617, "clip_ratio/high_mean": 0.0009011084002850112, "clip_ratio/low_mean": 0.0005780601268270402, "clip_ratio/low_min": 1.5375153452623636e-05, "clip_ratio/region_mean": 0.0014791684989177156, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3285.0, "completions/mean_length": 784.7332763671875, "completions/mean_terminated_length": 534.3013305664062, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 5.475801749271137, "grad_norm": 0.23711398243904114, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 336487820.0, "reward": 0.652901828289032, "reward_std": 0.17581765353679657, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 586 }, { "clip_ratio/high_max": 0.001547319370729383, "clip_ratio/high_mean": 0.0004892838705927716, "clip_ratio/low_mean": 0.00044836341567133786, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000937647293540067, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 845.9564819335938, "completions/mean_terminated_length": 604.3465576171875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 5.485131195335277, "grad_norm": 0.4471912980079651, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 337090461.0, "reward": 0.578125, "reward_std": 0.1476692408323288, "rewards/verify_math_reward/mean": 0.578125, "rewards/verify_math_reward/std": 0.4941346049308777, "step": 587 }, { "clip_ratio/high_max": 0.0018884771052398719, "clip_ratio/high_mean": 0.0006595084778382443, "clip_ratio/low_mean": 0.0005453445792227285, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012048530406900682, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3647.0, "completions/mean_length": 904.1495971679688, "completions/mean_terminated_length": 586.9226684570312, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 5.494460641399417, "grad_norm": 0.2929361164569855, "learning_rate": 1e-06, "loss": -0.0177, "num_tokens": 337666475.0, "reward": 0.5915178656578064, "reward_std": 0.16694730520248413, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 588 }, { "clip_ratio/high_max": 0.002092951541271759, "clip_ratio/high_mean": 0.000775908210016496, "clip_ratio/low_mean": 0.0004078817446497851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011837899546662811, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1752.0, "completions/mean_length": 783.6585083007812, "completions/mean_terminated_length": 550.1719970703125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.503790087463557, "grad_norm": 0.2266514003276825, "learning_rate": 1e-06, "loss": -0.0338, "num_tokens": 338212625.0, "reward": 0.6662946939468384, "reward_std": 0.14951369166374207, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 589 }, { "clip_ratio/high_max": 0.001805393349059159, "clip_ratio/high_mean": 0.0005829860820085742, "clip_ratio/low_mean": 0.00044519790480990196, "clip_ratio/low_min": 1.721763146633748e-05, "clip_ratio/region_mean": 0.0010281839713570662, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 754.6797485351562, "completions/mean_terminated_length": 523.4188842773438, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 5.513119533527696, "grad_norm": 0.19441890716552734, "learning_rate": 1e-06, "loss": -0.0329, "num_tokens": 338734970.0, "reward": 0.6417410969734192, "reward_std": 0.13598664104938507, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 590 }, { "clip_ratio/high_max": 0.0019316535326652229, "clip_ratio/high_mean": 0.0005992088181301369, "clip_ratio/low_mean": 0.0006648907292401418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012640995591937099, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3237.0, "completions/mean_length": 899.7567138671875, "completions/mean_terminated_length": 564.7620239257812, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 5.522448979591837, "grad_norm": 0.2167060673236847, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 339289072.0, "reward": 0.6037946939468384, "reward_std": 0.15026073157787323, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 591 }, { "clip_ratio/high_max": 0.0017859246545413043, "clip_ratio/high_mean": 0.000552656611034763, "clip_ratio/low_mean": 0.00042353591743449215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009761925284692552, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3097.0, "completions/mean_length": 781.654052734375, "completions/mean_terminated_length": 552.2601928710938, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.531778425655976, "grad_norm": 0.1835176795721054, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 339853098.0, "reward": 0.6149553656578064, "reward_std": 0.14530275762081146, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 592 }, { "clip_ratio/high_max": 0.001817769731133012, "clip_ratio/high_mean": 0.0006622924884140957, "clip_ratio/low_mean": 0.0006602996272704331, "clip_ratio/low_min": 1.0310128345736302e-05, "clip_ratio/region_mean": 0.0013225921247794759, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3932.0, "completions/mean_length": 812.091552734375, "completions/mean_terminated_length": 546.6851806640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 5.541107871720117, "grad_norm": 0.22387462854385376, "learning_rate": 1e-06, "loss": -0.019, "num_tokens": 340402620.0, "reward": 0.6383928656578064, "reward_std": 0.14617151021957397, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 593 }, { "clip_ratio/high_max": 0.0020483315347519238, "clip_ratio/high_mean": 0.000892381542143994, "clip_ratio/low_mean": 0.0006209309067344293, "clip_ratio/low_min": 1.291589160246076e-05, "clip_ratio/region_mean": 0.001513312501629116, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3277.0, "completions/mean_length": 921.42529296875, "completions/mean_terminated_length": 601.6277465820312, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.550437317784256, "grad_norm": 0.20755481719970703, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 340976921.0, "reward": 0.6149553656578064, "reward_std": 0.17430990934371948, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 594 }, { "clip_ratio/high_max": 0.0022864980783197097, "clip_ratio/high_mean": 0.0007460992619598983, "clip_ratio/low_mean": 0.0007244940734381089, "clip_ratio/low_min": 9.901774319587275e-06, "clip_ratio/region_mean": 0.0014705933426739648, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4080.0, "completions/mean_length": 825.6038208007812, "completions/mean_terminated_length": 574.0348510742188, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 5.559766763848397, "grad_norm": 0.27528148889541626, "learning_rate": 1e-06, "loss": -0.0221, "num_tokens": 341561558.0, "reward": 0.5491071939468384, "reward_std": 0.1669868528842926, "rewards/verify_math_reward/mean": 0.5491071343421936, "rewards/verify_math_reward/std": 0.49786055088043213, "step": 595 }, { "clip_ratio/high_max": 0.00207165781466756, "clip_ratio/high_mean": 0.0009048133542819414, "clip_ratio/low_mean": 0.0005004548697797873, "clip_ratio/low_min": 1.8105445633409545e-05, "clip_ratio/region_mean": 0.0014052681763132568, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3538.0, "completions/mean_length": 840.622802734375, "completions/mean_terminated_length": 547.5596313476562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 5.569096209912536, "grad_norm": 0.2678869962692261, "learning_rate": 1e-06, "loss": -0.0347, "num_tokens": 342087452.0, "reward": 0.6328125, "reward_std": 0.17893162369728088, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 596 }, { "clip_ratio/high_max": 0.001691892248345539, "clip_ratio/high_mean": 0.0005671769349646638, "clip_ratio/low_mean": 0.00044722312645717466, "clip_ratio/low_min": 1.7375590687151998e-05, "clip_ratio/region_mean": 0.0010144000789296115, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3886.0, "completions/mean_length": 720.739990234375, "completions/mean_terminated_length": 550.592041015625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.578425655976677, "grad_norm": 0.21810497343540192, "learning_rate": 1e-06, "loss": -0.0115, "num_tokens": 342634691.0, "reward": 0.6718750596046448, "reward_std": 0.13035649061203003, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 597 }, { "clip_ratio/high_max": 0.0018793922354234383, "clip_ratio/high_mean": 0.0006814006374042947, "clip_ratio/low_mean": 0.0004309818004912813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011123824297101237, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3201.0, "completions/mean_length": 857.7578735351562, "completions/mean_terminated_length": 566.2372436523438, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 5.587755102040816, "grad_norm": 0.2967858910560608, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 343184650.0, "reward": 0.606026828289032, "reward_std": 0.1594373881816864, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 598 }, { "clip_ratio/high_max": 0.0017086845764424652, "clip_ratio/high_mean": 0.000784298299549846, "clip_ratio/low_mean": 0.0006106574583100155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013949557906016707, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3116.0, "completions/mean_length": 908.1138916015625, "completions/mean_terminated_length": 552.1464233398438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 5.597084548104956, "grad_norm": 0.22466321289539337, "learning_rate": 1e-06, "loss": -0.0288, "num_tokens": 343721272.0, "reward": 0.5736607313156128, "reward_std": 0.17878004908561707, "rewards/verify_math_reward/mean": 0.5736607313156128, "rewards/verify_math_reward/std": 0.4948205351829529, "step": 599 }, { "clip_ratio/high_max": 0.0017492636834504083, "clip_ratio/high_mean": 0.0007136720123526175, "clip_ratio/low_mean": 0.0005256916329017258, "clip_ratio/low_min": 1.220703143189894e-05, "clip_ratio/region_mean": 0.0012393636534397956, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 899.51904296875, "completions/mean_terminated_length": 594.7200927734375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 5.606413994169096, "grad_norm": 0.18881264328956604, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 344295065.0, "reward": 0.613839328289032, "reward_std": 0.1774245798587799, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 600 }, { "clip_ratio/high_max": 0.001968203840078786, "clip_ratio/high_mean": 0.0008214008994400501, "clip_ratio/low_mean": 0.0005234422815192374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001344843167316867, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3169.0, "completions/mean_length": 763.0636596679688, "completions/mean_terminated_length": 553.5195922851562, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 5.615743440233236, "grad_norm": 0.22382906079292297, "learning_rate": 1e-06, "loss": -0.0056, "num_tokens": 344850122.0, "reward": 0.668526828289032, "reward_std": 0.1704043447971344, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 601 }, { "clip_ratio/high_max": 0.0020664919356931932, "clip_ratio/high_mean": 0.0007152585403673584, "clip_ratio/low_mean": 0.0005268582699500257, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012421168466971722, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 748.1529541015625, "completions/mean_terminated_length": 554.4757690429688, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.625072886297376, "grad_norm": 0.21465174853801727, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 345399363.0, "reward": 0.6495535969734192, "reward_std": 0.16683463752269745, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 602 }, { "clip_ratio/high_max": 0.001694723789114505, "clip_ratio/high_mean": 0.0006381245766533539, "clip_ratio/low_mean": 0.0003428182244533673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009809427901927847, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2273.0, "completions/mean_length": 797.8717041015625, "completions/mean_terminated_length": 548.433349609375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.634402332361516, "grad_norm": 0.23483262956142426, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 345950984.0, "reward": 0.7042410969734192, "reward_std": 0.13936907052993774, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 603 }, { "clip_ratio/high_max": 0.0015487017008126713, "clip_ratio/high_mean": 0.0007026943740129354, "clip_ratio/low_mean": 0.0004674681936194247, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011701625771820545, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 799.7188110351562, "completions/mean_terminated_length": 567.3643798828125, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 5.643731778425656, "grad_norm": 0.21795716881752014, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 346512308.0, "reward": 0.6551339626312256, "reward_std": 0.16566641628742218, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 604 }, { "clip_ratio/high_max": 0.0023101557744666934, "clip_ratio/high_mean": 0.0008812618871161249, "clip_ratio/low_mean": 0.0005640503950417042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014453122712438926, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3197.0, "completions/mean_length": 843.5011596679688, "completions/mean_terminated_length": 567.8656616210938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 5.653061224489796, "grad_norm": 0.2422908991575241, "learning_rate": 1e-06, "loss": -0.0452, "num_tokens": 347070853.0, "reward": 0.637276828289032, "reward_std": 0.19467879831790924, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 605 }, { "clip_ratio/high_max": 0.0016240206314250827, "clip_ratio/high_mean": 0.0006713330221828073, "clip_ratio/low_mean": 0.0004585997867252445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001129932796175126, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2902.0, "completions/mean_length": 791.4799194335938, "completions/mean_terminated_length": 545.8201293945312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.662390670553936, "grad_norm": 0.1954859495162964, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 347613251.0, "reward": 0.6640625, "reward_std": 0.15631458163261414, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 606 }, { "clip_ratio/high_max": 0.0017178686139232013, "clip_ratio/high_mean": 0.0005973039187665563, "clip_ratio/low_mean": 0.0004332460266596172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010305499399692053, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3724.0, "completions/mean_length": 808.7645263671875, "completions/mean_terminated_length": 547.369873046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.671720116618076, "grad_norm": 0.18111705780029297, "learning_rate": 1e-06, "loss": -0.0263, "num_tokens": 348154360.0, "reward": 0.6863839626312256, "reward_std": 0.126701220870018, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 607 }, { "clip_ratio/high_max": 0.0017496866712463088, "clip_ratio/high_mean": 0.0007021832898317371, "clip_ratio/low_mean": 0.00041005166713148355, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011122349460492842, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3501.0, "completions/mean_length": 704.2355346679688, "completions/mean_terminated_length": 520.68115234375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 5.681049562682215, "grad_norm": 0.21597105264663696, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 348691003.0, "reward": 0.6897321939468384, "reward_std": 0.14282116293907166, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 608 }, { "clip_ratio/high_max": 0.0018038047419395298, "clip_ratio/high_mean": 0.0006084399683459196, "clip_ratio/low_mean": 0.0006828202385804616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001291260192374466, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2171.0, "completions/mean_length": 859.4699096679688, "completions/mean_terminated_length": 563.8063354492188, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 5.690379008746356, "grad_norm": 0.1902514398097992, "learning_rate": 1e-06, "loss": -0.0257, "num_tokens": 349243272.0, "reward": 0.59375, "reward_std": 0.15965536236763, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 609 }, { "clip_ratio/high_max": 0.00212705120065948, "clip_ratio/high_mean": 0.0007869412202126114, "clip_ratio/low_mean": 0.0005006231767765712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012875644024461508, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3585.0, "completions/mean_length": 729.0658569335938, "completions/mean_terminated_length": 508.87396240234375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 5.699708454810495, "grad_norm": 0.21717499196529388, "learning_rate": 1e-06, "loss": -0.0257, "num_tokens": 349762723.0, "reward": 0.6964285969734192, "reward_std": 0.1497408002614975, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 610 }, { "clip_ratio/high_max": 0.0014468907320406288, "clip_ratio/high_mean": 0.0005383249972510384, "clip_ratio/low_mean": 0.00037257574604154797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009109007332881447, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 809.4074096679688, "completions/mean_terminated_length": 548.0638427734375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 5.709037900874636, "grad_norm": 0.16854842007160187, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 350313016.0, "reward": 0.6462053656578064, "reward_std": 0.14233079552650452, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 611 }, { "clip_ratio/high_max": 0.0016304294695146382, "clip_ratio/high_mean": 0.0006607993400393752, "clip_ratio/low_mean": 0.0004933656573484768, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011541649982973468, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 855.9598388671875, "completions/mean_terminated_length": 568.5686645507812, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 5.718367346938775, "grad_norm": 0.21609733998775482, "learning_rate": 1e-06, "loss": -0.0412, "num_tokens": 350880596.0, "reward": 0.6584821939468384, "reward_std": 0.17258423566818237, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 612 }, { "clip_ratio/high_max": 0.0017601579238544218, "clip_ratio/high_mean": 0.0005464939931698609, "clip_ratio/low_mean": 0.0002638160899550712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008103100808511954, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 862.3627319335938, "completions/mean_terminated_length": 536.615478515625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 5.727696793002916, "grad_norm": 0.2475437968969345, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 351407057.0, "reward": 0.6305803656578064, "reward_std": 0.111860491335392, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 613 }, { "clip_ratio/high_max": 0.001923630596138537, "clip_ratio/high_mean": 0.000620996192083112, "clip_ratio/low_mean": 0.0006291031231739908, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012500993070716504, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3904.0, "completions/mean_length": 766.2288208007812, "completions/mean_terminated_length": 535.767333984375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 5.737026239067055, "grad_norm": 0.19469042122364044, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 351949766.0, "reward": 0.6171875, "reward_std": 0.13711459934711456, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 614 }, { "clip_ratio/high_max": 0.0018321072893741075, "clip_ratio/high_mean": 0.0006474209503721795, "clip_ratio/low_mean": 0.0004664251600843272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011138460904476233, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3790.0, "completions/mean_length": 779.232177734375, "completions/mean_terminated_length": 541.1865844726562, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 5.746355685131196, "grad_norm": 0.2441077083349228, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 352490694.0, "reward": 0.6104910969734192, "reward_std": 0.13857996463775635, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 615 }, { "clip_ratio/high_max": 0.0018733175711531658, "clip_ratio/high_mean": 0.00071459826631326, "clip_ratio/low_mean": 0.00040857206249711453, "clip_ratio/low_min": 1.9379844161449e-05, "clip_ratio/region_mean": 0.001123170310165733, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3101.0, "completions/mean_length": 830.3772583007812, "completions/mean_terminated_length": 527.7097778320312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 5.755685131195335, "grad_norm": 0.2149052768945694, "learning_rate": 1e-06, "loss": -0.0365, "num_tokens": 353016248.0, "reward": 0.6573660969734192, "reward_std": 0.15935185551643372, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 616 }, { "clip_ratio/high_max": 0.002151643970137229, "clip_ratio/high_mean": 0.0008402461171499453, "clip_ratio/low_mean": 0.0005802615378343035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014205076768121216, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2003.0, "completions/mean_length": 798.3672485351562, "completions/mean_terminated_length": 536.1458129882812, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 5.765014577259475, "grad_norm": 0.3056844174861908, "learning_rate": 1e-06, "loss": -0.0375, "num_tokens": 353547585.0, "reward": 0.6149553656578064, "reward_std": 0.16653333604335785, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 617 }, { "clip_ratio/high_max": 0.001931530590809416, "clip_ratio/high_mean": 0.0007804630440659821, "clip_ratio/low_mean": 0.0004457067543626181, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012261698029760737, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 836.1741333007812, "completions/mean_terminated_length": 576.9590454101562, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 5.774344023323615, "grad_norm": 0.4081643521785736, "learning_rate": 1e-06, "loss": -0.0382, "num_tokens": 354110749.0, "reward": 0.6629464626312256, "reward_std": 0.1755470633506775, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 618 }, { "clip_ratio/high_max": 0.0021630388000630774, "clip_ratio/high_mean": 0.0008867753840604564, "clip_ratio/low_mean": 0.0004932618467137218, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013800371889374219, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3079.0, "completions/mean_length": 838.5480346679688, "completions/mean_terminated_length": 540.9732055664062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 5.783673469387755, "grad_norm": 0.25812169909477234, "learning_rate": 1e-06, "loss": -0.0163, "num_tokens": 354644392.0, "reward": 0.6194196939468384, "reward_std": 0.1750592142343521, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 619 }, { "clip_ratio/high_max": 0.0016750669856264722, "clip_ratio/high_mean": 0.0006418176044462598, "clip_ratio/low_mean": 0.00045677239268115954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010985900153173134, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 752.2076416015625, "completions/mean_terminated_length": 512.2224731445312, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.793002915451895, "grad_norm": 0.23503591120243073, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 355169490.0, "reward": 0.6774553656578064, "reward_std": 0.147894948720932, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 620 }, { "clip_ratio/high_max": 0.0017263800364162307, "clip_ratio/high_mean": 0.000655078043564572, "clip_ratio/low_mean": 0.0004387635049170058, "clip_ratio/low_min": 2.0868114006589167e-05, "clip_ratio/region_mean": 0.001093841539841378, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 802.0569458007812, "completions/mean_terminated_length": 569.8673706054688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 5.802332361516035, "grad_norm": 0.21068450808525085, "learning_rate": 1e-06, "loss": -0.0353, "num_tokens": 355733549.0, "reward": 0.6261160969734192, "reward_std": 0.1429726928472519, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 621 }, { "clip_ratio/high_max": 0.002015686441154685, "clip_ratio/high_mean": 0.0008106550358206732, "clip_ratio/low_mean": 0.0005287678868626244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013394228735705838, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3426.0, "completions/mean_length": 768.6082763671875, "completions/mean_terminated_length": 559.412841796875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 5.811661807580175, "grad_norm": 0.2241523116827011, "learning_rate": 1e-06, "loss": -0.0281, "num_tokens": 356289342.0, "reward": 0.6752232313156128, "reward_std": 0.178443044424057, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 622 }, { "clip_ratio/high_max": 0.0019235800136812031, "clip_ratio/high_mean": 0.000648291599645745, "clip_ratio/low_mean": 0.0004592416796640464, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001107533294998575, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2706.0, "completions/mean_length": 912.388427734375, "completions/mean_terminated_length": 600.2696533203125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 5.820991253644315, "grad_norm": 0.19351865351200104, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 356879562.0, "reward": 0.6149553656578064, "reward_std": 0.15221214294433594, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 623 }, { "clip_ratio/high_max": 0.0015534516278421506, "clip_ratio/high_mean": 0.0005410439616753138, "clip_ratio/low_mean": 0.0003709982795498945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009120422218984459, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 906.0636596679688, "completions/mean_terminated_length": 601.8887939453125, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.830320699708455, "grad_norm": 0.19623930752277374, "learning_rate": 1e-06, "loss": -0.0474, "num_tokens": 357473683.0, "reward": 0.5959821939468384, "reward_std": 0.1352352499961853, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 624 }, { "clip_ratio/high_max": 0.0018489718095224816, "clip_ratio/high_mean": 0.0007639082032255828, "clip_ratio/low_mean": 0.00044334438871374005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012072525569237769, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 826.9285888671875, "completions/mean_terminated_length": 532.6326293945312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 5.839650145772595, "grad_norm": 0.19132301211357117, "learning_rate": 1e-06, "loss": -0.0527, "num_tokens": 357991139.0, "reward": 0.7042410969734192, "reward_std": 0.1562379002571106, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 625 }, { "clip_ratio/high_max": 0.001652845643548062, "clip_ratio/high_mean": 0.0005679082350980025, "clip_ratio/low_mean": 0.00034982249826498446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009177307219943032, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3818.0, "completions/mean_length": 694.943115234375, "completions/mean_terminated_length": 510.8858642578125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 5.848979591836734, "grad_norm": 0.1636369377374649, "learning_rate": 1e-06, "loss": -0.0049, "num_tokens": 358505368.0, "reward": 0.6640625, "reward_std": 0.11404222249984741, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 626 }, { "clip_ratio/high_max": 0.001495146207162179, "clip_ratio/high_mean": 0.0005264190381240041, "clip_ratio/low_mean": 0.0005458227988128783, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010722418483055662, "completions/clipped_ratio": 0.0558035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3368.0, "completions/mean_length": 749.0803833007812, "completions/mean_terminated_length": 551.2718505859375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 5.858309037900875, "grad_norm": 0.2107505351305008, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 359066704.0, "reward": 0.6741071939468384, "reward_std": 0.13665854930877686, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 627 }, { "clip_ratio/high_max": 0.0023203105356515152, "clip_ratio/high_mean": 0.0006591551582459942, "clip_ratio/low_mean": 0.0005891631481063087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012483183018048294, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3372.0, "completions/mean_length": 810.544677734375, "completions/mean_terminated_length": 570.5293579101562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 5.867638483965014, "grad_norm": 0.21155844628810883, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 359623400.0, "reward": 0.640625, "reward_std": 0.147451713681221, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 628 }, { "clip_ratio/high_max": 0.001622506413696101, "clip_ratio/high_mean": 0.0005611461847365717, "clip_ratio/low_mean": 0.00047974971175790415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001040895891492255, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3044.0, "completions/mean_length": 802.2098388671875, "completions/mean_terminated_length": 540.2940063476562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 5.876967930029155, "grad_norm": 0.20598623156547546, "learning_rate": 1e-06, "loss": -0.0319, "num_tokens": 360169964.0, "reward": 0.645089328289032, "reward_std": 0.13534514605998993, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 629 }, { "clip_ratio/high_max": 0.002303055392985698, "clip_ratio/high_mean": 0.0008456104351353133, "clip_ratio/low_mean": 0.0005595912602984754, "clip_ratio/low_min": 2.916013909270987e-05, "clip_ratio/region_mean": 0.0014052016849745996, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2753.0, "completions/mean_length": 754.3928833007812, "completions/mean_terminated_length": 548.5118408203125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 5.886297376093294, "grad_norm": 0.2988147735595703, "learning_rate": 1e-06, "loss": -0.0141, "num_tokens": 360724940.0, "reward": 0.6506696939468384, "reward_std": 0.17063213884830475, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 630 }, { "clip_ratio/high_max": 0.0016552548222534824, "clip_ratio/high_mean": 0.0004942677851431654, "clip_ratio/low_mean": 0.0004892132105851488, "clip_ratio/low_min": 4.0523016650695354e-05, "clip_ratio/region_mean": 0.0009834810116444714, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 763.2053833007812, "completions/mean_terminated_length": 545.2461547851562, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 5.895626822157435, "grad_norm": 0.1725149303674698, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 361271180.0, "reward": 0.6328125, "reward_std": 0.13226650655269623, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 631 }, { "clip_ratio/high_max": 0.0024809171445667744, "clip_ratio/high_mean": 0.0009537415789964143, "clip_ratio/low_mean": 0.0006145953857412678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015683369456382934, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 890.0100708007812, "completions/mean_terminated_length": 567.0479125976562, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 5.904956268221574, "grad_norm": 0.24993817508220673, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 361829709.0, "reward": 0.5970982313156128, "reward_std": 0.18614406883716583, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 632 }, { "clip_ratio/high_max": 0.001738108621793799, "clip_ratio/high_mean": 0.0007113383526302641, "clip_ratio/low_mean": 0.00046371721600735327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011750555713661015, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3509.0, "completions/mean_length": 714.6016235351562, "completions/mean_terminated_length": 544.1441650390625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 5.914285714285715, "grad_norm": 0.20464277267456055, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 362374872.0, "reward": 0.660714328289032, "reward_std": 0.15033671259880066, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 633 }, { "clip_ratio/high_max": 0.0019651527181849815, "clip_ratio/high_mean": 0.0007409344379993854, "clip_ratio/low_mean": 0.0006084804099373287, "clip_ratio/low_min": 3.630906212492846e-05, "clip_ratio/region_mean": 0.0013494148734025657, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3855.0, "completions/mean_length": 790.779052734375, "completions/mean_terminated_length": 557.7944946289062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 5.923615160349854, "grad_norm": 0.24859444797039032, "learning_rate": 1e-06, "loss": -0.0346, "num_tokens": 362933986.0, "reward": 0.6584821939468384, "reward_std": 0.16792160272598267, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 634 }, { "clip_ratio/high_max": 0.002345296394196339, "clip_ratio/high_mean": 0.0009953158478310797, "clip_ratio/low_mean": 0.0004788298765561194, "clip_ratio/low_min": 1.7615557226235978e-05, "clip_ratio/region_mean": 0.0014741457271156833, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 945.5547485351562, "completions/mean_terminated_length": 558.6578979492188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 5.932944606413994, "grad_norm": 0.24806180596351624, "learning_rate": 1e-06, "loss": -0.0533, "num_tokens": 363477195.0, "reward": 0.6328125, "reward_std": 0.1837027221918106, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 635 }, { "clip_ratio/high_max": 0.0017773505969671533, "clip_ratio/high_mean": 0.0005512566249308293, "clip_ratio/low_mean": 0.0004607334658430773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010119900944118854, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3295.0, "completions/mean_length": 843.6998291015625, "completions/mean_terminated_length": 572.3470458984375, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 5.942274052478134, "grad_norm": 0.19955603778362274, "learning_rate": 1e-06, "loss": -0.0303, "num_tokens": 364037646.0, "reward": 0.6328125, "reward_std": 0.13444572687149048, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 636 }, { "clip_ratio/high_max": 0.002003154790145345, "clip_ratio/high_mean": 0.0007183063389675226, "clip_ratio/low_mean": 0.0006264768635446671, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013447832097881474, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2371.0, "completions/mean_length": 830.1897583007812, "completions/mean_terminated_length": 536.1873168945312, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 5.9516034985422746, "grad_norm": 0.19695554673671722, "learning_rate": 1e-06, "loss": -0.0261, "num_tokens": 364574120.0, "reward": 0.6015625, "reward_std": 0.1510944366455078, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 637 }, { "clip_ratio/high_max": 0.002064436566797667, "clip_ratio/high_mean": 0.0007059309355099685, "clip_ratio/low_mean": 0.0004430554208738613, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011489863572933245, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3456.0, "completions/mean_length": 943.9933471679688, "completions/mean_terminated_length": 609.3358154296875, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 5.960932944606414, "grad_norm": 0.22147022187709808, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 365163418.0, "reward": 0.6037946939468384, "reward_std": 0.15405866503715515, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 638 }, { "clip_ratio/high_max": 0.0015090778506419156, "clip_ratio/high_mean": 0.0004944426536894753, "clip_ratio/low_mean": 0.0003357936457177857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008302363039547345, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 789.9285888671875, "completions/mean_terminated_length": 544.1535034179688, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 5.970262390670554, "grad_norm": 0.1750473976135254, "learning_rate": 1e-06, "loss": -0.0119, "num_tokens": 365708458.0, "reward": 0.6305803656578064, "reward_std": 0.10941943526268005, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.48291724920272827, "step": 639 }, { "clip_ratio/high_max": 0.0016332373525074217, "clip_ratio/high_mean": 0.0005789284296042752, "clip_ratio/low_mean": 0.0004030867044093611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009820151553867618, "completions/clipped_ratio": 0.052455357142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3506.0, "completions/mean_length": 705.8516235351562, "completions/mean_terminated_length": 518.175537109375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 5.979591836734694, "grad_norm": 0.4237379729747772, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 366228629.0, "reward": 0.6584821939468384, "reward_std": 0.13406775891780853, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 640 }, { "clip_ratio/high_max": 0.001970581157365814, "clip_ratio/high_mean": 0.0007922044205770362, "clip_ratio/low_mean": 0.0006420257413992658, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014342301474243868, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3675.0, "completions/mean_length": 951.9207763671875, "completions/mean_terminated_length": 592.1505126953125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 5.988921282798834, "grad_norm": 0.2584664225578308, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 366804686.0, "reward": 0.5569196939468384, "reward_std": 0.17942126095294952, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.49702703952789307, "step": 641 }, { "clip_ratio/high_max": 0.0016068845870904624, "clip_ratio/high_mean": 0.0004907044321953435, "clip_ratio/low_mean": 0.00035171221770724514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008424166571785463, "completions/clipped_ratio": 0.08238636363636365, "completions/max_length": 4096.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 860.1875, "completions/mean_terminated_length": 569.6655883789062, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 5.998250728862974, "grad_norm": 0.21397212147712708, "learning_rate": 1e-06, "loss": -0.0036, "num_tokens": 367330884.0, "reward": 0.637276828289032, "reward_std": 0.11404110491275787, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 642 }, { "clip_ratio/high_max": 0.002405567061941838, "clip_ratio/high_mean": 0.0008714211853657616, "clip_ratio/low_mean": 0.0004944297033944167, "clip_ratio/low_min": 1.0056315659312531e-05, "clip_ratio/region_mean": 0.0013658508541993797, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3641.0, "completions/mean_length": 829.6172485351562, "completions/mean_terminated_length": 578.3569946289062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 6.0093294460641395, "grad_norm": 0.23643708229064941, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 367917021.0, "reward": 0.6383928656578064, "reward_std": 0.15782159566879272, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 643 }, { "clip_ratio/high_max": 0.001965926945558749, "clip_ratio/high_mean": 0.0008047975861700252, "clip_ratio/low_mean": 0.0005486659656526172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013534635691030417, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 764.6763916015625, "completions/mean_terminated_length": 508.4206848144531, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 6.01865889212828, "grad_norm": 0.26358529925346375, "learning_rate": 1e-06, "loss": -0.0405, "num_tokens": 368417643.0, "reward": 0.6852678656578064, "reward_std": 0.18727271258831024, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 644 }, { "clip_ratio/high_max": 0.0019731192442122847, "clip_ratio/high_mean": 0.0008096116944216192, "clip_ratio/low_mean": 0.0005940697265032213, "clip_ratio/low_min": 1.8242848454974592e-05, "clip_ratio/region_mean": 0.0014036814282007981, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2381.0, "completions/mean_length": 927.7779541015625, "completions/mean_terminated_length": 552.021240234375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.0279883381924195, "grad_norm": 0.3170129060745239, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 368951340.0, "reward": 0.6473214626312256, "reward_std": 0.18066614866256714, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 645 }, { "clip_ratio/high_max": 0.0015176476808846928, "clip_ratio/high_mean": 0.0005830992067785701, "clip_ratio/low_mean": 0.0005638477869069902, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011469469936855603, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 894.935302734375, "completions/mean_terminated_length": 576.7926635742188, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.03731778425656, "grad_norm": 0.23401102423667908, "learning_rate": 1e-06, "loss": -0.0183, "num_tokens": 369522258.0, "reward": 0.5915178656578064, "reward_std": 0.151692196726799, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 646 }, { "clip_ratio/high_max": 0.0018379630200797692, "clip_ratio/high_mean": 0.0007818344874976901, "clip_ratio/low_mean": 0.0005430814962892327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013249159819679335, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3348.0, "completions/mean_length": 876.3672485351562, "completions/mean_terminated_length": 620.3482055664062, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.0466472303206995, "grad_norm": 0.24321548640727997, "learning_rate": 1e-06, "loss": -0.0147, "num_tokens": 370121483.0, "reward": 0.6350446939468384, "reward_std": 0.1859932243824005, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 647 }, { "clip_ratio/high_max": 0.0015598051140841562, "clip_ratio/high_mean": 0.000518383514645393, "clip_ratio/low_mean": 0.0004901263337160344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010085098510899115, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3214.0, "completions/mean_length": 778.114990234375, "completions/mean_terminated_length": 535.7305297851562, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 6.05597667638484, "grad_norm": 0.16377153992652893, "learning_rate": 1e-06, "loss": -0.0194, "num_tokens": 370656354.0, "reward": 0.6863839626312256, "reward_std": 0.1388038545846939, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 648 }, { "clip_ratio/high_max": 0.0016828532570798416, "clip_ratio/high_mean": 0.0006458414413827995, "clip_ratio/low_mean": 0.000329727351982001, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009755687806318747, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 813.5960083007812, "completions/mean_terminated_length": 586.4129028320312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.0653061224489795, "grad_norm": 0.19361768662929535, "learning_rate": 1e-06, "loss": -0.0242, "num_tokens": 371235488.0, "reward": 0.6640625, "reward_std": 0.1301603466272354, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 649 }, { "clip_ratio/high_max": 0.00226844877761323, "clip_ratio/high_mean": 0.0007824697695468785, "clip_ratio/low_mean": 0.00036311576377556776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011455855274107307, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 761.9185791015625, "completions/mean_terminated_length": 522.63037109375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 6.07463556851312, "grad_norm": 0.20134110748767853, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 371763743.0, "reward": 0.6540178656578064, "reward_std": 0.11986782401800156, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 650 }, { "clip_ratio/high_max": 0.0017510913785372395, "clip_ratio/high_mean": 0.0005578294021688635, "clip_ratio/low_mean": 0.0004051493924634997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009629787855374161, "completions/clipped_ratio": 0.0479910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3783.0, "completions/mean_length": 696.9955444335938, "completions/mean_terminated_length": 525.650634765625, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 6.0839650145772595, "grad_norm": 0.24923075735569, "learning_rate": 1e-06, "loss": -0.0213, "num_tokens": 372303115.0, "reward": 0.6517857313156128, "reward_std": 0.12223109602928162, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 651 }, { "clip_ratio/high_max": 0.0016269245134026278, "clip_ratio/high_mean": 0.0004937498461003997, "clip_ratio/low_mean": 0.0005023160556447692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009960658990166849, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1837.0, "completions/mean_length": 764.6484985351562, "completions/mean_terminated_length": 516.9940185546875, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.093294460641399, "grad_norm": 0.2499609887599945, "learning_rate": 1e-06, "loss": -0.0101, "num_tokens": 372822464.0, "reward": 0.6584821939468384, "reward_std": 0.11637409776449203, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 652 }, { "clip_ratio/high_max": 0.0017889736518554855, "clip_ratio/high_mean": 0.0006591872588614933, "clip_ratio/low_mean": 0.0006058605977159459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012650478711293545, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3414.0, "completions/mean_length": 814.4408569335938, "completions/mean_terminated_length": 544.9407958984375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 6.1026239067055394, "grad_norm": 0.2120325118303299, "learning_rate": 1e-06, "loss": -0.0142, "num_tokens": 373364467.0, "reward": 0.637276828289032, "reward_std": 0.14995622634887695, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 653 }, { "clip_ratio/high_max": 0.0019145760343235452, "clip_ratio/high_mean": 0.000730699280211411, "clip_ratio/low_mean": 0.0006068510438126395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013375503294810187, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 852.5647583007812, "completions/mean_terminated_length": 577.6973876953125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 6.111953352769679, "grad_norm": 0.406982958316803, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 373938861.0, "reward": 0.6149553656578064, "reward_std": 0.1619105339050293, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 654 }, { "clip_ratio/high_max": 0.0015829457479412667, "clip_ratio/high_mean": 0.0005346698862922494, "clip_ratio/low_mean": 0.0004069591695952113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009416290286026197, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3615.0, "completions/mean_length": 816.2623291015625, "completions/mean_terminated_length": 516.6516723632812, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 6.121282798833819, "grad_norm": 0.18141846358776093, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 374445176.0, "reward": 0.6272321939468384, "reward_std": 0.12407511472702026, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 655 }, { "clip_ratio/high_max": 0.001749757408106234, "clip_ratio/high_mean": 0.0006996469383011572, "clip_ratio/low_mean": 0.0004230048552926746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00112265181451221, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3431.0, "completions/mean_length": 747.4006958007812, "completions/mean_terminated_length": 528.4078369140625, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 6.130612244897959, "grad_norm": 0.21173200011253357, "learning_rate": 1e-06, "loss": -0.0286, "num_tokens": 374966799.0, "reward": 0.723214328289032, "reward_std": 0.14624707400798798, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 656 }, { "clip_ratio/high_max": 0.0023784947334206663, "clip_ratio/high_mean": 0.0008999665551527869, "clip_ratio/low_mean": 0.00035073289313913847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012506994462455623, "completions/clipped_ratio": 0.0502232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3135.0, "completions/mean_length": 748.3638916015625, "completions/mean_terminated_length": 571.3442993164062, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.139941690962099, "grad_norm": 0.24232593178749084, "learning_rate": 1e-06, "loss": -0.02, "num_tokens": 375538013.0, "reward": 0.6662946939468384, "reward_std": 0.17186996340751648, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 657 }, { "clip_ratio/high_max": 0.0017698585870675743, "clip_ratio/high_mean": 0.0006790151292079827, "clip_ratio/low_mean": 0.0005332792488843552, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012122944062866736, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3974.0, "completions/mean_length": 810.5022583007812, "completions/mean_terminated_length": 536.3796997070312, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.149271137026239, "grad_norm": 0.23561832308769226, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 376075407.0, "reward": 0.629464328289032, "reward_std": 0.15665017068386078, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 658 }, { "clip_ratio/high_max": 0.002033540342381457, "clip_ratio/high_mean": 0.0007511484600399854, "clip_ratio/low_mean": 0.00048375802634836873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012349064963927958, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 765.6217041015625, "completions/mean_terminated_length": 526.5992431640625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 6.158600583090379, "grad_norm": 0.22931915521621704, "learning_rate": 1e-06, "loss": -0.0253, "num_tokens": 376600716.0, "reward": 0.7087053656578064, "reward_std": 0.1317012757062912, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 659 }, { "clip_ratio/high_max": 0.001794919964595465, "clip_ratio/high_mean": 0.0006537741446663858, "clip_ratio/low_mean": 0.0005107600077280949, "clip_ratio/low_min": 1.1952572094742209e-05, "clip_ratio/region_mean": 0.0011645341619441751, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2942.0, "completions/mean_length": 808.6707763671875, "completions/mean_terminated_length": 542.9879150390625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 6.167930029154519, "grad_norm": 0.37244313955307007, "learning_rate": 1e-06, "loss": -0.0245, "num_tokens": 377137397.0, "reward": 0.6194196939468384, "reward_std": 0.15613023936748505, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 660 }, { "clip_ratio/high_max": 0.0016825433958729263, "clip_ratio/high_mean": 0.0006719929642713396, "clip_ratio/low_mean": 0.0004389477489894489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001110940702346852, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 909.794677734375, "completions/mean_terminated_length": 627.1786499023438, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 6.1772594752186585, "grad_norm": 0.38394808769226074, "learning_rate": 1e-06, "loss": -0.019, "num_tokens": 377755845.0, "reward": 0.5546875, "reward_std": 0.16856057941913605, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 661 }, { "clip_ratio/high_max": 0.001949212615727447, "clip_ratio/high_mean": 0.0008322285739268409, "clip_ratio/low_mean": 0.0004455939683793986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012778225900547113, "completions/clipped_ratio": 0.0513392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2791.0, "completions/mean_length": 749.2879638671875, "completions/mean_terminated_length": 568.1717529296875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.186588921282799, "grad_norm": 0.22008521854877472, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 378321719.0, "reward": 0.6852678656578064, "reward_std": 0.15958118438720703, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 662 }, { "clip_ratio/high_max": 0.0018381416484771762, "clip_ratio/high_mean": 0.0007279891924554249, "clip_ratio/low_mean": 0.0005336279491530149, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012616171334229875, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3719.0, "completions/mean_length": 797.904052734375, "completions/mean_terminated_length": 527.0458984375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 6.1959183673469385, "grad_norm": 0.22327379882335663, "learning_rate": 1e-06, "loss": -0.0435, "num_tokens": 378851529.0, "reward": 0.6863839626312256, "reward_std": 0.14507634937763214, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 663 }, { "clip_ratio/high_max": 0.0014841456613794435, "clip_ratio/high_mean": 0.0005774707378805033, "clip_ratio/low_mean": 0.00037224599600449437, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009497167211520718, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 739.6585083007812, "completions/mean_terminated_length": 549.6768798828125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.205247813411079, "grad_norm": 0.19939115643501282, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 379398783.0, "reward": 0.6495535969734192, "reward_std": 0.11881405115127563, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 664 }, { "clip_ratio/high_max": 0.00168674882115738, "clip_ratio/high_mean": 0.000540895513495343, "clip_ratio/low_mean": 0.00047623256841689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010171280755457701, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2777.0, "completions/mean_length": 837.6451416015625, "completions/mean_terminated_length": 599.609619140625, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.214577259475218, "grad_norm": 0.16969957947731018, "learning_rate": 1e-06, "loss": -0.0165, "num_tokens": 379998337.0, "reward": 0.5323660969734192, "reward_std": 0.12490066885948181, "rewards/verify_math_reward/mean": 0.5323660969734192, "rewards/verify_math_reward/std": 0.4992299973964691, "step": 665 }, { "clip_ratio/high_max": 0.0017531042649352457, "clip_ratio/high_mean": 0.0006934321909284336, "clip_ratio/low_mean": 0.0002739244739586866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009673566437413683, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2126.0, "completions/mean_length": 744.1016235351562, "completions/mean_terminated_length": 507.8267517089844, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 6.223906705539359, "grad_norm": 0.24664264917373657, "learning_rate": 1e-06, "loss": -0.0166, "num_tokens": 380501764.0, "reward": 0.699776828289032, "reward_std": 0.1265924572944641, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 666 }, { "clip_ratio/high_max": 0.0015495972147618886, "clip_ratio/high_mean": 0.0006333369365165709, "clip_ratio/low_mean": 0.00040289902472068206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010362359498685692, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3186.0, "completions/mean_length": 862.904052734375, "completions/mean_terminated_length": 593.153564453125, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.233236151603498, "grad_norm": 0.28692084550857544, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 381074422.0, "reward": 0.6462053656578064, "reward_std": 0.15195390582084656, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 667 }, { "clip_ratio/high_max": 0.0021219025147729553, "clip_ratio/high_mean": 0.0007889531007094774, "clip_ratio/low_mean": 0.0006719961311318912, "clip_ratio/low_min": 2.1777002984890714e-05, "clip_ratio/region_mean": 0.001460949260945199, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3754.0, "completions/mean_length": 876.638427734375, "completions/mean_terminated_length": 561.0147094726562, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.242565597667639, "grad_norm": 0.2616720497608185, "learning_rate": 1e-06, "loss": -0.0279, "num_tokens": 381627994.0, "reward": 0.6171875, "reward_std": 0.1624757945537567, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 668 }, { "clip_ratio/high_max": 0.0017941140267794253, "clip_ratio/high_mean": 0.0005700217761841486, "clip_ratio/low_mean": 0.0004883920100837713, "clip_ratio/low_min": 2.200704147981014e-05, "clip_ratio/region_mean": 0.0010584137926343828, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3982.0, "completions/mean_length": 797.9699096679688, "completions/mean_terminated_length": 565.4921875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 6.251895043731778, "grad_norm": 0.177029550075531, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 382192663.0, "reward": 0.5993303656578064, "reward_std": 0.12944427132606506, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 669 }, { "clip_ratio/high_max": 0.0021771472274849657, "clip_ratio/high_mean": 0.0008874079157976666, "clip_ratio/low_mean": 0.00047173102757369634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013591390052170027, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2376.0, "completions/mean_length": 791.4330444335938, "completions/mean_terminated_length": 511.385009765625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.261224489795918, "grad_norm": 0.23426395654678345, "learning_rate": 1e-06, "loss": -0.048, "num_tokens": 382705299.0, "reward": 0.6908482313156128, "reward_std": 0.13879750669002533, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 670 }, { "clip_ratio/high_max": 0.0020564780843415065, "clip_ratio/high_mean": 0.0008107266194201657, "clip_ratio/low_mean": 0.0006503005106424098, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014610271318815649, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3896.0, "completions/mean_length": 821.0424194335938, "completions/mean_terminated_length": 534.8810424804688, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 6.270553935860058, "grad_norm": 0.2185281217098236, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 383235081.0, "reward": 0.6674107313156128, "reward_std": 0.15372419357299805, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 671 }, { "clip_ratio/high_max": 0.0018346465112699661, "clip_ratio/high_mean": 0.0006521518334920984, "clip_ratio/low_mean": 0.0004316251993259357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010837770096259192, "completions/clipped_ratio": 0.0546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2778.0, "completions/mean_length": 763.9051513671875, "completions/mean_terminated_length": 571.1392822265625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.279883381924198, "grad_norm": 0.23892897367477417, "learning_rate": 1e-06, "loss": -0.0213, "num_tokens": 383813140.0, "reward": 0.6908482313156128, "reward_std": 0.14620429277420044, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 672 }, { "clip_ratio/high_max": 0.0018636267450347077, "clip_ratio/high_mean": 0.0006842506536486326, "clip_ratio/low_mean": 0.0004896163745797821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011738670073100366, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3406.0, "completions/mean_length": 768.0904541015625, "completions/mean_terminated_length": 537.7577514648438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.289212827988338, "grad_norm": 0.1821121871471405, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 384363509.0, "reward": 0.6238839626312256, "reward_std": 0.13853536546230316, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 673 }, { "clip_ratio/high_max": 0.0016410047792305704, "clip_ratio/high_mean": 0.0007759028194413986, "clip_ratio/low_mean": 0.0006135401199571788, "clip_ratio/low_min": 1.314405926677864e-05, "clip_ratio/region_mean": 0.0013894428993808106, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3348.0, "completions/mean_length": 765.5000610351562, "completions/mean_terminated_length": 560.3033447265625, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 6.298542274052478, "grad_norm": 0.22997897863388062, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 384923245.0, "reward": 0.6618303656578064, "reward_std": 0.1810377687215805, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 674 }, { "clip_ratio/high_max": 0.0017424870384274982, "clip_ratio/high_mean": 0.0005803296608064556, "clip_ratio/low_mean": 0.00042845564257731894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010087852824653964, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3366.0, "completions/mean_length": 846.7076416015625, "completions/mean_terminated_length": 554.1921997070312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 6.307871720116618, "grad_norm": 0.1782207190990448, "learning_rate": 1e-06, "loss": -0.0547, "num_tokens": 385454535.0, "reward": 0.6428571939468384, "reward_std": 0.1400102972984314, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 675 }, { "clip_ratio/high_max": 0.0019060583326790947, "clip_ratio/high_mean": 0.0007721769043200766, "clip_ratio/low_mean": 0.0004835599893340259, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012557368900161237, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 880.3482666015625, "completions/mean_terminated_length": 578.02197265625, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 6.317201166180758, "grad_norm": 0.19786468148231506, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 386015039.0, "reward": 0.6316964626312256, "reward_std": 0.16311588883399963, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 676 }, { "clip_ratio/high_max": 0.0018042715710180346, "clip_ratio/high_mean": 0.0006700275789626176, "clip_ratio/low_mean": 0.00042331810800533276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010933456505881622, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 837.7689819335938, "completions/mean_terminated_length": 527.0819091796875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 6.326530612244898, "grad_norm": 0.23652194440364838, "learning_rate": 1e-06, "loss": -0.0206, "num_tokens": 386537552.0, "reward": 0.6473214626312256, "reward_std": 0.14015227556228638, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 677 }, { "clip_ratio/high_max": 0.0014771329642826458, "clip_ratio/high_mean": 0.0004302015258872416, "clip_ratio/low_mean": 0.000347447462445416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007776490001560887, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3341.0, "completions/mean_length": 781.2232666015625, "completions/mean_terminated_length": 539.0658569335938, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 6.335860058309038, "grad_norm": 0.17091988027095795, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 387090720.0, "reward": 0.6573660969734192, "reward_std": 0.10926970839500427, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 678 }, { "clip_ratio/high_max": 0.0018406154558761045, "clip_ratio/high_mean": 0.0007871619691286469, "clip_ratio/low_mean": 0.0005521555058294325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001339317470410606, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 776.7266235351562, "completions/mean_terminated_length": 538.5011596679688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 6.345189504373177, "grad_norm": 0.24148808419704437, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 387629403.0, "reward": 0.7042410969734192, "reward_std": 0.16075189411640167, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 679 }, { "clip_ratio/high_max": 0.0016842037912283558, "clip_ratio/high_mean": 0.0006951858504180564, "clip_ratio/low_mean": 0.00044807020867665415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001143256053182995, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3784.0, "completions/mean_length": 843.3795166015625, "completions/mean_terminated_length": 593.1779174804688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 6.354518950437318, "grad_norm": 0.31907927989959717, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 388208791.0, "reward": 0.6640625, "reward_std": 0.15901413559913635, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 680 }, { "clip_ratio/high_max": 0.0017580458552401979, "clip_ratio/high_mean": 0.0006089514367886295, "clip_ratio/low_mean": 0.0005764675233876915, "clip_ratio/low_min": 3.156007096549729e-05, "clip_ratio/region_mean": 0.0011854189324367326, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2567.0, "completions/mean_length": 930.5803833007812, "completions/mean_terminated_length": 577.12158203125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 6.363848396501457, "grad_norm": 0.22167593240737915, "learning_rate": 1e-06, "loss": -0.0355, "num_tokens": 388764751.0, "reward": 0.5691964626312256, "reward_std": 0.15255165100097656, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 681 }, { "clip_ratio/high_max": 0.0015328846602642443, "clip_ratio/high_mean": 0.000523390966009174, "clip_ratio/low_mean": 0.0005523648501366551, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010757558084151242, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3860.0, "completions/mean_length": 861.0000610351562, "completions/mean_terminated_length": 561.1707153320312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 6.373177842565598, "grad_norm": 0.20637856423854828, "learning_rate": 1e-06, "loss": -0.0227, "num_tokens": 389320255.0, "reward": 0.621651828289032, "reward_std": 0.1396312266588211, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.485245943069458, "step": 682 }, { "clip_ratio/high_max": 0.0023740889264445286, "clip_ratio/high_mean": 0.0007973391748237191, "clip_ratio/low_mean": 0.0005626817628581193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001360020920401439, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 721.8158569335938, "completions/mean_terminated_length": 530.8242797851562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.382507288629737, "grad_norm": 0.23185193538665771, "learning_rate": 1e-06, "loss": -0.0247, "num_tokens": 389862994.0, "reward": 0.723214328289032, "reward_std": 0.1592869609594345, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 683 }, { "clip_ratio/high_max": 0.0017013444885378703, "clip_ratio/high_mean": 0.0006676474968116963, "clip_ratio/low_mean": 0.0006344834073388483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001302130905969534, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3117.0, "completions/mean_length": 852.130615234375, "completions/mean_terminated_length": 560.1033935546875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 6.391836734693878, "grad_norm": 0.2287149280309677, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 390413719.0, "reward": 0.5926339626312256, "reward_std": 0.17532162368297577, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 684 }, { "clip_ratio/high_max": 0.0020252322574378923, "clip_ratio/high_mean": 0.000795369150409897, "clip_ratio/low_mean": 0.00041700781093823025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012123769738536794, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 748.2957763671875, "completions/mean_terminated_length": 516.5930786132812, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 6.401166180758017, "grad_norm": 0.27178215980529785, "learning_rate": 1e-06, "loss": -0.0175, "num_tokens": 390938272.0, "reward": 0.7299107313156128, "reward_std": 0.15105310082435608, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 685 }, { "clip_ratio/high_max": 0.0016065499585238285, "clip_ratio/high_mean": 0.0005255454384496261, "clip_ratio/low_mean": 0.0005874646922165994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011130101411254145, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2741.0, "completions/mean_length": 877.1719360351562, "completions/mean_terminated_length": 517.7493896484375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 6.410495626822158, "grad_norm": 0.18121960759162903, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 391443082.0, "reward": 0.6428571939468384, "reward_std": 0.12388080358505249, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 686 }, { "clip_ratio/high_max": 0.0022832265167380683, "clip_ratio/high_mean": 0.0007447617481375346, "clip_ratio/low_mean": 0.000574076830616832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013188385964895133, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 843.0324096679688, "completions/mean_terminated_length": 571.6239624023438, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 6.419825072886297, "grad_norm": 7.517394065856934, "learning_rate": 1e-06, "loss": 0.0018, "num_tokens": 392000247.0, "reward": 0.6116071939468384, "reward_std": 0.164948508143425, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 687 }, { "clip_ratio/high_max": 0.0017661859492363874, "clip_ratio/high_mean": 0.000780365189712029, "clip_ratio/low_mean": 0.00047947918574209325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012598443645401858, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2984.0, "completions/mean_length": 910.2042846679688, "completions/mean_terminated_length": 571.9592895507812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.429154518950437, "grad_norm": 0.2341417372226715, "learning_rate": 1e-06, "loss": -0.0502, "num_tokens": 392551086.0, "reward": 0.6261160969734192, "reward_std": 0.1670539826154709, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 688 }, { "clip_ratio/high_max": 0.001945983760379022, "clip_ratio/high_mean": 0.0006622571581829106, "clip_ratio/low_mean": 0.0005116532493047998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011739104083972052, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3404.0, "completions/mean_length": 873.4085083007812, "completions/mean_terminated_length": 570.4298095703125, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.438483965014577, "grad_norm": 0.2397717386484146, "learning_rate": 1e-06, "loss": -0.0167, "num_tokens": 393109132.0, "reward": 0.5959821939468384, "reward_std": 0.1514211744070053, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 689 }, { "clip_ratio/high_max": 0.0021740635274909437, "clip_ratio/high_mean": 0.0009290729904023465, "clip_ratio/low_mean": 0.0006227267604117515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015517997162532993, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 842.5881958007812, "completions/mean_terminated_length": 571.1427001953125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 6.447813411078717, "grad_norm": 0.25231674313545227, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 393666299.0, "reward": 0.660714328289032, "reward_std": 0.1946806162595749, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 690 }, { "clip_ratio/high_max": 0.0023352392454398796, "clip_ratio/high_mean": 0.0008996025208034553, "clip_ratio/low_mean": 0.000517100941578974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014167034496495035, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3534.0, "completions/mean_length": 905.6328735351562, "completions/mean_terminated_length": 575.5947875976562, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 6.457142857142857, "grad_norm": 0.21373829245567322, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 394222154.0, "reward": 0.6428571939468384, "reward_std": 0.184951514005661, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 691 }, { "clip_ratio/high_max": 0.0014546386701113079, "clip_ratio/high_mean": 0.0005665116441377904, "clip_ratio/low_mean": 0.00039359544462058693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009601070814824197, "completions/clipped_ratio": 0.0435267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3667.0, "completions/mean_length": 709.2332763671875, "completions/mean_terminated_length": 555.1096801757812, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.466472303206997, "grad_norm": 0.2313837856054306, "learning_rate": 1e-06, "loss": -0.0087, "num_tokens": 394788171.0, "reward": 0.6674107313156128, "reward_std": 0.1511615812778473, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140389680862427, "step": 692 }, { "clip_ratio/high_max": 0.001757234273100039, "clip_ratio/high_mean": 0.0007398223715426866, "clip_ratio/low_mean": 0.00039782238400221104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011376447582733817, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 845.5938110351562, "completions/mean_terminated_length": 531.2949829101562, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 6.475801749271137, "grad_norm": 0.21056608855724335, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 395304631.0, "reward": 0.652901828289032, "reward_std": 0.15300630033016205, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 693 }, { "clip_ratio/high_max": 0.0017511395672045182, "clip_ratio/high_mean": 0.0006473053281297325, "clip_ratio/low_mean": 0.00040133827769750496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010486436185601633, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3572.0, "completions/mean_length": 753.8002319335938, "completions/mean_terminated_length": 509.6395263671875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 6.485131195335277, "grad_norm": 0.2462763786315918, "learning_rate": 1e-06, "loss": -0.0178, "num_tokens": 395821748.0, "reward": 0.6696428656578064, "reward_std": 0.12783098220825195, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 694 }, { "clip_ratio/high_max": 0.0021231266655377112, "clip_ratio/high_mean": 0.0008790855936240405, "clip_ratio/low_mean": 0.00041230998681385245, "clip_ratio/low_min": 2.891510484914761e-05, "clip_ratio/region_mean": 0.0012913956124975812, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 828.591552734375, "completions/mean_terminated_length": 530.1072387695312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 6.494460641399417, "grad_norm": 0.4874882400035858, "learning_rate": 1e-06, "loss": -0.0521, "num_tokens": 396344326.0, "reward": 0.6573660969734192, "reward_std": 0.16101041436195374, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 695 }, { "clip_ratio/high_max": 0.0015126053513085935, "clip_ratio/high_mean": 0.0004811641765627428, "clip_ratio/low_mean": 0.0004651214394471026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009462856378377182, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3821.0, "completions/mean_length": 837.0045166015625, "completions/mean_terminated_length": 577.8554077148438, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.503790087463557, "grad_norm": 0.2033204585313797, "learning_rate": 1e-06, "loss": -0.0074, "num_tokens": 396917538.0, "reward": 0.6116071939468384, "reward_std": 0.13650770485401154, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 696 }, { "clip_ratio/high_max": 0.0021814837818965316, "clip_ratio/high_mean": 0.0007460377855750266, "clip_ratio/low_mean": 0.00048013355899456656, "clip_ratio/low_min": 1.0751763511507306e-05, "clip_ratio/region_mean": 0.0012261713527550455, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3853.0, "completions/mean_length": 950.0234985351562, "completions/mean_terminated_length": 576.9050903320312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 6.513119533527696, "grad_norm": 0.2154729664325714, "learning_rate": 1e-06, "loss": -0.0574, "num_tokens": 397476311.0, "reward": 0.6383928656578064, "reward_std": 0.15751849114894867, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 697 }, { "clip_ratio/high_max": 0.002022316286456771, "clip_ratio/high_mean": 0.0006916968668519985, "clip_ratio/low_mean": 0.00048438541489304043, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011760822853830177, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 862.8370971679688, "completions/mean_terminated_length": 550.2056274414062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.522448979591837, "grad_norm": 0.23432853817939758, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 398025581.0, "reward": 0.6328125, "reward_std": 0.15285545587539673, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 698 }, { "clip_ratio/high_max": 0.00194493422168307, "clip_ratio/high_mean": 0.0007136685198929626, "clip_ratio/low_mean": 0.0004591452625390957, "clip_ratio/low_min": 2.2986392650636844e-05, "clip_ratio/region_mean": 0.001172813746961765, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 825.2131958007812, "completions/mean_terminated_length": 508.9436950683594, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 6.531778425655976, "grad_norm": 0.21726079285144806, "learning_rate": 1e-06, "loss": -0.038, "num_tokens": 398531116.0, "reward": 0.7098214626312256, "reward_std": 0.138991117477417, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 699 }, { "clip_ratio/high_max": 0.0018781147518893704, "clip_ratio/high_mean": 0.0007367989092017524, "clip_ratio/low_mean": 0.00044935276400792645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011861516613862477, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 766.6295166015625, "completions/mean_terminated_length": 519.122314453125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.541107871720117, "grad_norm": 0.22654405236244202, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 399044248.0, "reward": 0.6729910969734192, "reward_std": 0.1536468118429184, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 700 }, { "clip_ratio/high_max": 0.0016621490285615437, "clip_ratio/high_mean": 0.0005753648920290289, "clip_ratio/low_mean": 0.0003774890003569453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009528538903396111, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3531.0, "completions/mean_length": 872.8516235351562, "completions/mean_terminated_length": 582.6897583007812, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 6.550437317784256, "grad_norm": 0.19929608702659607, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 399608355.0, "reward": 0.6417410969734192, "reward_std": 0.13324108719825745, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 701 }, { "clip_ratio/high_max": 0.0019489681944833137, "clip_ratio/high_mean": 0.0007136997555790003, "clip_ratio/low_mean": 0.00040497460850019706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011186743431608193, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 777.1819458007812, "completions/mean_terminated_length": 534.7293701171875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 6.559766763848397, "grad_norm": 0.20507614314556122, "learning_rate": 1e-06, "loss": -0.0297, "num_tokens": 400139158.0, "reward": 0.6484375, "reward_std": 0.12020343542098999, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 702 }, { "clip_ratio/high_max": 0.0018310301566089038, "clip_ratio/high_mean": 0.0006095708095017471, "clip_ratio/low_mean": 0.0005028325331295491, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011124033262603916, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2436.0, "completions/mean_length": 984.6763916015625, "completions/mean_terminated_length": 584.98486328125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.569096209912536, "grad_norm": 0.801872730255127, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 400679980.0, "reward": 0.5814732313156128, "reward_std": 0.146018847823143, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 703 }, { "clip_ratio/high_max": 0.002049558133876417, "clip_ratio/high_mean": 0.0007897551877249498, "clip_ratio/low_mean": 0.00045285856003829394, "clip_ratio/low_min": 1.3199577551858965e-05, "clip_ratio/region_mean": 0.0012426137327565812, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3632.0, "completions/mean_length": 737.536865234375, "completions/mean_terminated_length": 547.4351196289062, "completions/min_length": 181.0, "completions/min_terminated_length": 181.0, "epoch": 6.578425655976677, "grad_norm": 0.22027653455734253, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 401229757.0, "reward": 0.6674107313156128, "reward_std": 0.17551273107528687, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 704 }, { "clip_ratio/high_max": 0.002380739701038692, "clip_ratio/high_mean": 0.0009879878580250079, "clip_ratio/low_mean": 0.0004997300620743772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014877178909955546, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2423.0, "completions/mean_length": 838.864990234375, "completions/mean_terminated_length": 592.5269775390625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 6.587755102040816, "grad_norm": 0.22483842074871063, "learning_rate": 1e-06, "loss": -0.0247, "num_tokens": 401804492.0, "reward": 0.6651785969734192, "reward_std": 0.18280190229415894, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 705 }, { "clip_ratio/high_max": 0.0015944882870826405, "clip_ratio/high_mean": 0.0006383999811987451, "clip_ratio/low_mean": 0.00037323112701415084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010116311332240002, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2502.0, "completions/mean_length": 858.6283569335938, "completions/mean_terminated_length": 571.473876953125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 6.597084548104956, "grad_norm": 0.2551387548446655, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 402367871.0, "reward": 0.6328125, "reward_std": 0.13982413709163666, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 706 }, { "clip_ratio/high_max": 0.0014203772880136967, "clip_ratio/high_mean": 0.0004773060836669174, "clip_ratio/low_mean": 0.00036724474284710595, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000844550824695034, "completions/clipped_ratio": 0.0591517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3039.0, "completions/mean_length": 748.7767944335938, "completions/mean_terminated_length": 538.3345336914062, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 6.606413994169096, "grad_norm": 0.2324991375207901, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 402913079.0, "reward": 0.6417410969734192, "reward_std": 0.12756815552711487, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 707 }, { "clip_ratio/high_max": 0.0019358624704182148, "clip_ratio/high_mean": 0.000905928885913454, "clip_ratio/low_mean": 0.00046523944092768943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013711683350265957, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3393.0, "completions/mean_length": 777.9832763671875, "completions/mean_terminated_length": 531.3201293945312, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 6.615743440233236, "grad_norm": 0.29323431849479675, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 403446328.0, "reward": 0.6886160969734192, "reward_std": 0.1590908318758011, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 708 }, { "clip_ratio/high_max": 0.0020780169870704412, "clip_ratio/high_mean": 0.0006810736704210285, "clip_ratio/low_mean": 0.00048153315719901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011626068117038812, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3561.0, "completions/mean_length": 816.2455444335938, "completions/mean_terminated_length": 542.6021728515625, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 6.625072886297376, "grad_norm": 0.22786401212215424, "learning_rate": 1e-06, "loss": -0.0366, "num_tokens": 403984356.0, "reward": 0.6540178656578064, "reward_std": 0.15537026524543762, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 709 }, { "clip_ratio/high_max": 0.0019382916543690953, "clip_ratio/high_mean": 0.0007331270335271256, "clip_ratio/low_mean": 0.0002815239149640547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010146509393962333, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3715.0, "completions/mean_length": 852.4096069335938, "completions/mean_terminated_length": 547.4566650390625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.634402332361516, "grad_norm": 45.36116027832031, "learning_rate": 1e-06, "loss": 0.0086, "num_tokens": 404532515.0, "reward": 0.6584821939468384, "reward_std": 0.14391450583934784, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 710 }, { "clip_ratio/high_max": 0.0013017620076425374, "clip_ratio/high_mean": 0.0004013598336314317, "clip_ratio/low_mean": 0.00034816852894437034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000749528353480855, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3106.0, "completions/mean_length": 813.5201416015625, "completions/mean_terminated_length": 565.2652587890625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 6.643731778425656, "grad_norm": 0.19472123682498932, "learning_rate": 1e-06, "loss": -0.0259, "num_tokens": 405087237.0, "reward": 0.6473214626312256, "reward_std": 0.11445339024066925, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 711 }, { "clip_ratio/high_max": 0.0016469336114823818, "clip_ratio/high_mean": 0.0005770206389570376, "clip_ratio/low_mean": 0.0005242761690169573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001101296813430963, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3273.0, "completions/mean_length": 921.318115234375, "completions/mean_terminated_length": 540.3562622070312, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 6.653061224489796, "grad_norm": 1109248256.0, "learning_rate": 1e-06, "loss": 30502.4707, "num_tokens": 405615890.0, "reward": 0.6049107313156128, "reward_std": 0.1540568619966507, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 712 }, { "clip_ratio/high_max": 0.0017509955614514183, "clip_ratio/high_mean": 0.0005239955335127888, "clip_ratio/low_mean": 0.0002240728153992677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007480683543690247, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3967.0, "completions/mean_length": 770.5814819335938, "completions/mean_terminated_length": 548.8869018554688, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 6.662390670553936, "grad_norm": 0.17097845673561096, "learning_rate": 1e-06, "loss": -0.0336, "num_tokens": 406167971.0, "reward": 0.6752232313156128, "reward_std": 0.09818372875452042, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 713 }, { "clip_ratio/high_max": 0.0017584624647497549, "clip_ratio/high_mean": 0.0005277417051274824, "clip_ratio/low_mean": 0.0004908358516786393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010185775645368267, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4041.0, "completions/mean_length": 852.0379638671875, "completions/mean_terminated_length": 577.1259155273438, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.671720116618076, "grad_norm": 0.16864407062530518, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 406743789.0, "reward": 0.613839328289032, "reward_std": 0.11629742383956909, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 714 }, { "clip_ratio/high_max": 0.0020065759199496824, "clip_ratio/high_mean": 0.0006850403196949628, "clip_ratio/low_mean": 0.00035257973286206834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010376200516475365, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3452.0, "completions/mean_length": 905.8605346679688, "completions/mean_terminated_length": 527.5043334960938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 6.681049562682215, "grad_norm": 0.2451452910900116, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 407257056.0, "reward": 0.6573660969734192, "reward_std": 0.1242266595363617, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 715 }, { "clip_ratio/high_max": 0.0021253882805467583, "clip_ratio/high_mean": 0.0008916977321860031, "clip_ratio/low_mean": 0.0005053585273344652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001397056257701479, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3484.0, "completions/mean_length": 865.935302734375, "completions/mean_terminated_length": 566.5634155273438, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 6.690379008746356, "grad_norm": 0.21814365684986115, "learning_rate": 1e-06, "loss": -0.0215, "num_tokens": 407812390.0, "reward": 0.5970982313156128, "reward_std": 0.17130474746227264, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 716 }, { "clip_ratio/high_max": 0.0018165794099331833, "clip_ratio/high_mean": 0.0007312041234399658, "clip_ratio/low_mean": 0.0004184676008662791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011496717233967502, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3870.0, "completions/mean_length": 937.302490234375, "completions/mean_terminated_length": 593.285888671875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 6.699708454810495, "grad_norm": 0.204402357339859, "learning_rate": 1e-06, "loss": -0.0472, "num_tokens": 408380773.0, "reward": 0.621651828289032, "reward_std": 0.1486877053976059, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 717 }, { "clip_ratio/high_max": 0.0015623804483766435, "clip_ratio/high_mean": 0.0006160191819617467, "clip_ratio/low_mean": 0.0004172031394773512, "clip_ratio/low_min": 1.2077294741175137e-05, "clip_ratio/region_mean": 0.0010332223173463717, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3620.0, "completions/mean_length": 801.1986694335938, "completions/mean_terminated_length": 552.011962890625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.709037900874636, "grad_norm": 0.2643035352230072, "learning_rate": 1e-06, "loss": -0.0184, "num_tokens": 408924919.0, "reward": 0.6796875596046448, "reward_std": 0.13771051168441772, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 718 }, { "clip_ratio/high_max": 0.0015866108260524925, "clip_ratio/high_mean": 0.0006346721202135086, "clip_ratio/low_mean": 0.00043240988088655286, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010670819756342098, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2123.0, "completions/mean_length": 771.7522583007812, "completions/mean_terminated_length": 545.909423828125, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 6.718367346938775, "grad_norm": 1.0090049505233765, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 409481817.0, "reward": 0.6696428656578064, "reward_std": 0.1456729769706726, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 719 }, { "clip_ratio/high_max": 0.0019794810286839493, "clip_ratio/high_mean": 0.0007128198585633072, "clip_ratio/low_mean": 0.0005413898770711967, "clip_ratio/low_min": 1.2902560229122173e-05, "clip_ratio/region_mean": 0.0012542097392724827, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3631.0, "completions/mean_length": 856.7020263671875, "completions/mean_terminated_length": 556.474365234375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 6.727696793002916, "grad_norm": 0.2689896523952484, "learning_rate": 1e-06, "loss": -0.0201, "num_tokens": 410027046.0, "reward": 0.6183035969734192, "reward_std": 0.15315786004066467, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 720 }, { "clip_ratio/high_max": 0.0017890060844365507, "clip_ratio/high_mean": 0.0006061323074391112, "clip_ratio/low_mean": 0.0003104347533735563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009165670671791304, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3616.0, "completions/mean_length": 804.7042846679688, "completions/mean_terminated_length": 551.5276489257812, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 6.737026239067055, "grad_norm": 0.20530693233013153, "learning_rate": 1e-06, "loss": -0.0207, "num_tokens": 410566837.0, "reward": 0.6774553656578064, "reward_std": 0.12767691910266876, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 721 }, { "clip_ratio/high_max": 0.0017850015283329412, "clip_ratio/high_mean": 0.0006699838495478616, "clip_ratio/low_mean": 0.00028910670334880706, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009590905501681846, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3791.0, "completions/mean_length": 813.3080444335938, "completions/mean_terminated_length": 526.4708862304688, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.746355685131196, "grad_norm": 0.2605624794960022, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 411103425.0, "reward": 0.637276828289032, "reward_std": 0.12760023772716522, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 722 }, { "clip_ratio/high_max": 0.0013954752721474506, "clip_ratio/high_mean": 0.00039860400420366204, "clip_ratio/low_mean": 0.00026434793812768476, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006629519339185208, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2090.0, "completions/mean_length": 752.2533569335938, "completions/mean_terminated_length": 520.8245849609375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 6.755685131195335, "grad_norm": 0.9102131128311157, "learning_rate": 1e-06, "loss": -0.0267, "num_tokens": 411627812.0, "reward": 0.625, "reward_std": 0.09119697660207748, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 723 }, { "clip_ratio/high_max": 0.0020355298402137123, "clip_ratio/high_mean": 0.0007807658621459268, "clip_ratio/low_mean": 0.0005408401111708372, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013216059505793964, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1668.0, "completions/mean_length": 734.8392944335938, "completions/mean_terminated_length": 527.7536010742188, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 6.765014577259475, "grad_norm": 0.2034630924463272, "learning_rate": 1e-06, "loss": -0.0549, "num_tokens": 412154268.0, "reward": 0.6897321939468384, "reward_std": 0.14902472496032715, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 724 }, { "clip_ratio/high_max": 0.001617327634448884, "clip_ratio/high_mean": 0.0005615910349661135, "clip_ratio/low_mean": 0.0002966302085951611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008582212394685484, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3917.0, "completions/mean_length": 881.1027221679688, "completions/mean_terminated_length": 574.5477294921875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 6.774344023323615, "grad_norm": 0.2027897983789444, "learning_rate": 1e-06, "loss": -0.0287, "num_tokens": 412709896.0, "reward": 0.6116071939468384, "reward_std": 0.13113674521446228, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 725 }, { "clip_ratio/high_max": 0.002235222033050377, "clip_ratio/high_mean": 0.000967257887168671, "clip_ratio/low_mean": 0.0004345703191575012, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014018282199685927, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2156.0, "completions/mean_length": 845.0111694335938, "completions/mean_terminated_length": 539.3626708984375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.783673469387755, "grad_norm": 1.8573672771453857, "learning_rate": 1e-06, "loss": -0.0735, "num_tokens": 413239082.0, "reward": 0.6696428656578064, "reward_std": 0.17574027180671692, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 726 }, { "clip_ratio/high_max": 0.0015628930868842872, "clip_ratio/high_mean": 0.0005348108979887911, "clip_ratio/low_mean": 0.00035897696534448187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008937878665165044, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2173.0, "completions/mean_length": 875.8471069335938, "completions/mean_terminated_length": 547.098388671875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.793002915451895, "grad_norm": 0.19924987852573395, "learning_rate": 1e-06, "loss": -0.0646, "num_tokens": 413774273.0, "reward": 0.5970982313156128, "reward_std": 0.13835102319717407, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 727 }, { "clip_ratio/high_max": 0.0018707213748712093, "clip_ratio/high_mean": 0.0007416300595650682, "clip_ratio/low_mean": 0.00043128803736181, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011729180878319312, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3907.0, "completions/mean_length": 906.9732666015625, "completions/mean_terminated_length": 572.73486328125, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 6.802332361516035, "grad_norm": 0.21239390969276428, "learning_rate": 1e-06, "loss": -0.065, "num_tokens": 414322769.0, "reward": 0.5859375, "reward_std": 0.1555984914302826, "rewards/verify_math_reward/mean": 0.5859375, "rewards/verify_math_reward/std": 0.4928344786167145, "step": 728 }, { "clip_ratio/high_max": 0.0017851747797976714, "clip_ratio/high_mean": 0.0007487396505894139, "clip_ratio/low_mean": 0.0004084533720742911, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011571930153877474, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 847.0301513671875, "completions/mean_terminated_length": 567.4218139648438, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 6.811661807580175, "grad_norm": 0.40516436100006104, "learning_rate": 1e-06, "loss": -0.0535, "num_tokens": 414872268.0, "reward": 0.6752232313156128, "reward_std": 0.16096945106983185, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 729 }, { "clip_ratio/high_max": 0.0018265884900756646, "clip_ratio/high_mean": 0.0005265837353363167, "clip_ratio/low_mean": 0.0005540081328945234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010805918573169038, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3536.0, "completions/mean_length": 879.1038208007812, "completions/mean_terminated_length": 550.6875610351562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.820991253644315, "grad_norm": 0.25719979405403137, "learning_rate": 1e-06, "loss": -0.0195, "num_tokens": 415396513.0, "reward": 0.5680803656578064, "reward_std": 0.13752618432044983, "rewards/verify_math_reward/mean": 0.5680803656578064, "rewards/verify_math_reward/std": 0.4956200420856476, "step": 730 }, { "clip_ratio/high_max": 0.001776021550540463, "clip_ratio/high_mean": 0.0006765694761270424, "clip_ratio/low_mean": 0.0003450459280429641, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010216153896180913, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2321.0, "completions/mean_length": 939.0480346679688, "completions/mean_terminated_length": 546.9046630859375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 6.830320699708455, "grad_norm": 0.25183525681495667, "learning_rate": 1e-06, "loss": -0.0372, "num_tokens": 415921196.0, "reward": 0.5993303656578064, "reward_std": 0.14158260822296143, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 731 }, { "clip_ratio/high_max": 0.001664623723627301, "clip_ratio/high_mean": 0.0005984655344946077, "clip_ratio/low_mean": 0.0004878929175902158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010863584830076434, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 886.1920166015625, "completions/mean_terminated_length": 584.4151611328125, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 6.839650145772595, "grad_norm": 0.22967371344566345, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 416490416.0, "reward": 0.6361607313156128, "reward_std": 0.1456383466720581, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 732 }, { "clip_ratio/high_max": 0.002098257900797762, "clip_ratio/high_mean": 0.0007389553775283275, "clip_ratio/low_mean": 0.00045374384535534773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011926992301596329, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3985.0, "completions/mean_length": 851.9185791015625, "completions/mean_terminated_length": 546.91943359375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 6.848979591836734, "grad_norm": 0.2931455075740814, "learning_rate": 1e-06, "loss": -0.0326, "num_tokens": 417039431.0, "reward": 0.6395089626312256, "reward_std": 0.1689721643924713, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 733 }, { "clip_ratio/high_max": 0.00213176980469143, "clip_ratio/high_mean": 0.0006978256369620794, "clip_ratio/low_mean": 0.00043826582168549066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011360914322722238, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 897.9397583007812, "completions/mean_terminated_length": 536.4198608398438, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 6.858309037900875, "grad_norm": 0.21264854073524475, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 417559961.0, "reward": 0.6227678656578064, "reward_std": 0.143612802028656, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 734 }, { "clip_ratio/high_max": 0.0015545234564342536, "clip_ratio/high_mean": 0.0006458678044509725, "clip_ratio/low_mean": 0.0004670431944759912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001112910973461112, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2442.0, "completions/mean_length": 928.6719360351562, "completions/mean_terminated_length": 539.7017211914062, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 6.867638483965014, "grad_norm": 0.2152799367904663, "learning_rate": 1e-06, "loss": -0.0297, "num_tokens": 418090427.0, "reward": 0.6104910969734192, "reward_std": 0.13027937710285187, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 735 }, { "clip_ratio/high_max": 0.0021052377014711965, "clip_ratio/high_mean": 0.0008278625191451283, "clip_ratio/low_mean": 0.0005098735819046851, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013377360883168876, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 805.4766235351562, "completions/mean_terminated_length": 543.8204956054688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 6.876967930029155, "grad_norm": 0.2666328549385071, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 418641934.0, "reward": 0.6707589626312256, "reward_std": 0.15349483489990234, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 736 }, { "clip_ratio/high_max": 0.0012388359282340389, "clip_ratio/high_mean": 0.00039999733917284175, "clip_ratio/low_mean": 0.00034508345288486453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007450808043358847, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3093.0, "completions/mean_length": 754.3158569335938, "completions/mean_terminated_length": 523.0298461914062, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 6.886297376093294, "grad_norm": 0.19998709857463837, "learning_rate": 1e-06, "loss": -0.0192, "num_tokens": 419179249.0, "reward": 0.6517857313156128, "reward_std": 0.11419376730918884, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 737 }, { "clip_ratio/high_max": 0.0018614743385114707, "clip_ratio/high_mean": 0.0006670547882094979, "clip_ratio/low_mean": 0.0005492903319463949, "clip_ratio/low_min": 1.2512512512330431e-05, "clip_ratio/region_mean": 0.0012163451392552815, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2785.0, "completions/mean_length": 967.966552734375, "completions/mean_terminated_length": 583.822021484375, "completions/min_length": 184.0, "completions/min_terminated_length": 184.0, "epoch": 6.895626822157435, "grad_norm": 0.19895361363887787, "learning_rate": 1e-06, "loss": -0.0415, "num_tokens": 419739211.0, "reward": 0.5725446939468384, "reward_std": 0.1481582224369049, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 738 }, { "clip_ratio/high_max": 0.0014793432565056719, "clip_ratio/high_mean": 0.0005714016688216361, "clip_ratio/low_mean": 0.0005917988237342797, "clip_ratio/low_min": 3.0882758437655866e-05, "clip_ratio/region_mean": 0.0011632004916464211, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3461.0, "completions/mean_length": 876.9297485351562, "completions/mean_terminated_length": 517.4801635742188, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 6.904956268221574, "grad_norm": 0.2275323122739792, "learning_rate": 1e-06, "loss": -0.0231, "num_tokens": 420237484.0, "reward": 0.6473214626312256, "reward_std": 0.1413978785276413, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 739 }, { "clip_ratio/high_max": 0.001665983862039866, "clip_ratio/high_mean": 0.00047589814585080603, "clip_ratio/low_mean": 0.0003746619590856426, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008505601072101854, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2560.0, "completions/mean_length": 907.9219360351562, "completions/mean_terminated_length": 547.5303955078125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 6.914285714285715, "grad_norm": 0.2475191056728363, "learning_rate": 1e-06, "loss": -0.0283, "num_tokens": 420772646.0, "reward": 0.5870535969734192, "reward_std": 0.12031038105487823, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 740 }, { "clip_ratio/high_max": 0.0019404951526666991, "clip_ratio/high_mean": 0.0007807975161995273, "clip_ratio/low_mean": 0.00048363557652919553, "clip_ratio/low_min": 3.6296260077506304e-05, "clip_ratio/region_mean": 0.0012644331000046805, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3166.0, "completions/mean_length": 792.4933471679688, "completions/mean_terminated_length": 516.8682250976562, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 6.923615160349854, "grad_norm": 0.286824107170105, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 421280584.0, "reward": 0.6662946939468384, "reward_std": 0.17228437960147858, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 741 }, { "clip_ratio/high_max": 0.0018482838677300606, "clip_ratio/high_mean": 0.0005934440705459565, "clip_ratio/low_mean": 0.0006454563026636606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001238900385942543, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3417.0, "completions/mean_length": 798.521240234375, "completions/mean_terminated_length": 536.3120727539062, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 6.932944606413994, "grad_norm": 0.23824188113212585, "learning_rate": 1e-06, "loss": -0.0232, "num_tokens": 421810227.0, "reward": 0.652901828289032, "reward_std": 0.15338465571403503, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 742 }, { "clip_ratio/high_max": 0.001606914273907023, "clip_ratio/high_mean": 0.0005033282220665569, "clip_ratio/low_mean": 0.0003377502998773707, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008410785121668596, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3995.0, "completions/mean_length": 758.1663208007812, "completions/mean_terminated_length": 510.0299987792969, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 6.942274052478134, "grad_norm": 0.5451865792274475, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 422330560.0, "reward": 0.6495535969734192, "reward_std": 0.11675135046243668, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 743 }, { "clip_ratio/high_max": 0.001633227540878579, "clip_ratio/high_mean": 0.0006699478708469542, "clip_ratio/low_mean": 0.0005339775061656837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012039253997500055, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4063.0, "completions/mean_length": 917.8404541015625, "completions/mean_terminated_length": 567.3370361328125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 6.9516034985422746, "grad_norm": 0.3352295160293579, "learning_rate": 1e-06, "loss": -0.0321, "num_tokens": 422874705.0, "reward": 0.566964328289032, "reward_std": 0.1612718552350998, "rewards/verify_math_reward/mean": 0.5669642686843872, "rewards/verify_math_reward/std": 0.49577224254608154, "step": 744 }, { "clip_ratio/high_max": 0.00205148749955697, "clip_ratio/high_mean": 0.000676715019835683, "clip_ratio/low_mean": 0.000427590067829442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001104305089029367, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 851.4029541015625, "completions/mean_terminated_length": 550.6841430664062, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 6.960932944606414, "grad_norm": 0.22099699079990387, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 423416826.0, "reward": 0.6506696939468384, "reward_std": 0.1401529759168625, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 745 }, { "clip_ratio/high_max": 0.0021962550017633475, "clip_ratio/high_mean": 0.0007704723448114237, "clip_ratio/low_mean": 0.00033543988092787913, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011059122334700078, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3455.0, "completions/mean_length": 812.8326416015625, "completions/mean_terminated_length": 538.9044799804688, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 6.970262390670554, "grad_norm": 0.21213407814502716, "learning_rate": 1e-06, "loss": -0.0406, "num_tokens": 423953420.0, "reward": 0.6796875596046448, "reward_std": 0.14248555898666382, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 746 }, { "clip_ratio/high_max": 0.001903683692944469, "clip_ratio/high_mean": 0.000747757512726821, "clip_ratio/low_mean": 0.0005013756072003162, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001249133114470169, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4029.0, "completions/mean_length": 823.5089721679688, "completions/mean_terminated_length": 533.2393798828125, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 6.979591836734694, "grad_norm": 0.2869330942630768, "learning_rate": 1e-06, "loss": -0.0155, "num_tokens": 424478788.0, "reward": 0.6417410969734192, "reward_std": 0.15575045347213745, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 747 }, { "clip_ratio/high_max": 0.0017768329671525862, "clip_ratio/high_mean": 0.0005946480705460999, "clip_ratio/low_mean": 0.0003190028014614654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009136508633673657, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3612.0, "completions/mean_length": 877.8114013671875, "completions/mean_terminated_length": 566.6279296875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 6.988921282798834, "grad_norm": 0.2015768140554428, "learning_rate": 1e-06, "loss": -0.0324, "num_tokens": 425022099.0, "reward": 0.6852678656578064, "reward_std": 0.11419377475976944, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 748 }, { "clip_ratio/high_max": 0.0020705315000668634, "clip_ratio/high_mean": 0.0007473792957171099, "clip_ratio/low_mean": 0.0005184333222132409, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001265812614292372, "completions/clipped_ratio": 0.08806818181818177, "completions/max_length": 4096.0, "completions/max_terminated_length": 2477.0, "completions/mean_length": 816.102294921875, "completions/mean_terminated_length": 499.3520202636719, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 6.998250728862974, "grad_norm": 0.21387995779514313, "learning_rate": 1e-06, "loss": -0.0716, "num_tokens": 425536239.0, "reward": 0.574776828289032, "reward_std": 0.15417632460594177, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 749 }, { "clip_ratio/high_max": 0.00155554620505427, "clip_ratio/high_mean": 0.0005096999029774452, "clip_ratio/low_mean": 0.0005069395642749441, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010166394840780413, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2647.0, "completions/mean_length": 826.6506958007812, "completions/mean_terminated_length": 523.6378173828125, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 7.0093294460641395, "grad_norm": 0.2210661917924881, "learning_rate": 1e-06, "loss": -0.0454, "num_tokens": 426061510.0, "reward": 0.6339285969734192, "reward_std": 0.14815637469291687, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 750 }, { "clip_ratio/high_max": 0.0018054342217510566, "clip_ratio/high_mean": 0.0006222102547326358, "clip_ratio/low_mean": 0.00036785796783078695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009900682234729175, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3571.0, "completions/mean_length": 822.5391235351562, "completions/mean_terminated_length": 532.1834716796875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 7.01865889212828, "grad_norm": 0.21002645790576935, "learning_rate": 1e-06, "loss": -0.0197, "num_tokens": 426590681.0, "reward": 0.6908482313156128, "reward_std": 0.1256142258644104, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 751 }, { "clip_ratio/high_max": 0.0018424143236188684, "clip_ratio/high_mean": 0.000759250679948309, "clip_ratio/low_mean": 0.00038130590655782726, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001140556614700472, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 920.7332763671875, "completions/mean_terminated_length": 548.56982421875, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 7.0279883381924195, "grad_norm": 0.22753813862800598, "learning_rate": 1e-06, "loss": -0.0525, "num_tokens": 427114946.0, "reward": 0.652901828289032, "reward_std": 0.14669284224510193, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 752 }, { "clip_ratio/high_max": 0.0020993458601878956, "clip_ratio/high_mean": 0.0006957223295103176, "clip_ratio/low_mean": 0.0003861168702314899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010818392147484701, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 905.4498291015625, "completions/mean_terminated_length": 571.0517578125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.03731778425656, "grad_norm": 0.23804092407226562, "learning_rate": 1e-06, "loss": -0.0381, "num_tokens": 427663725.0, "reward": 0.6350446939468384, "reward_std": 0.13549810647964478, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 753 }, { "clip_ratio/high_max": 0.0021927592824795283, "clip_ratio/high_mean": 0.0008181643352145329, "clip_ratio/low_mean": 0.0005232930584497808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013414574059424922, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 884.075927734375, "completions/mean_terminated_length": 551.807861328125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.0466472303206995, "grad_norm": 0.4311201572418213, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 428198697.0, "reward": 0.640625, "reward_std": 0.15567448735237122, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 754 }, { "clip_ratio/high_max": 0.001530787460069405, "clip_ratio/high_mean": 0.0005595842558250297, "clip_ratio/low_mean": 0.00039646024833928095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009560445068927947, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3343.0, "completions/mean_length": 801.9207763671875, "completions/mean_terminated_length": 557.0371704101562, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 7.05597667638484, "grad_norm": 0.20450225472450256, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 428763946.0, "reward": 0.645089328289032, "reward_std": 0.1300102025270462, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 755 }, { "clip_ratio/high_max": 0.0017400143115082756, "clip_ratio/high_mean": 0.0006631210417253897, "clip_ratio/low_mean": 0.0003824581331173249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001045579178025946, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3717.0, "completions/mean_length": 849.6484985351562, "completions/mean_terminated_length": 565.9866333007812, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 7.0653061224489795, "grad_norm": 0.21997500956058502, "learning_rate": 1e-06, "loss": -0.0276, "num_tokens": 429319391.0, "reward": 0.660714328289032, "reward_std": 0.13842590153217316, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313389778137, "step": 756 }, { "clip_ratio/high_max": 0.0018967443211295176, "clip_ratio/high_mean": 0.000704463614965789, "clip_ratio/low_mean": 0.0002965810149362369, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010010446276282892, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3225.0, "completions/mean_length": 846.357177734375, "completions/mean_terminated_length": 505.7657165527344, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 7.07463556851312, "grad_norm": 1.8910417556762695, "learning_rate": 1e-06, "loss": -0.0454, "num_tokens": 429816807.0, "reward": 0.6640625, "reward_std": 0.1328292191028595, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 757 }, { "clip_ratio/high_max": 0.001921982580824988, "clip_ratio/high_mean": 0.0006678210411337204, "clip_ratio/low_mean": 0.0003846672611871327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010524883018661058, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 838.3225708007812, "completions/mean_terminated_length": 527.6882934570312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 7.0839650145772595, "grad_norm": 0.2775750160217285, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 430336448.0, "reward": 0.6886160969734192, "reward_std": 0.1339571624994278, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 758 }, { "clip_ratio/high_max": 0.001697431940556271, "clip_ratio/high_mean": 0.0005855698491359362, "clip_ratio/low_mean": 0.0004389806065319135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010245504636259284, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3409.0, "completions/mean_length": 816.7388916015625, "completions/mean_terminated_length": 525.8687744140625, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 7.093294460641399, "grad_norm": 0.25227269530296326, "learning_rate": 1e-06, "loss": -0.041, "num_tokens": 430861438.0, "reward": 0.6506696939468384, "reward_std": 0.15138980746269226, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 759 }, { "clip_ratio/high_max": 0.001748860981024336, "clip_ratio/high_mean": 0.0007294713250303175, "clip_ratio/low_mean": 0.0003953832233491994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011248545670241583, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4091.0, "completions/mean_length": 818.3225708007812, "completions/mean_terminated_length": 570.4309692382812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 7.1026239067055394, "grad_norm": 0.20775267481803894, "learning_rate": 1e-06, "loss": -0.0319, "num_tokens": 431428527.0, "reward": 0.6651785969734192, "reward_std": 0.1403031200170517, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 760 }, { "clip_ratio/high_max": 0.0017372585971315857, "clip_ratio/high_mean": 0.0005428749236671138, "clip_ratio/low_mean": 0.00031455166845262283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008574265866627684, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 858.169677734375, "completions/mean_terminated_length": 536.3729858398438, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 7.111953352769679, "grad_norm": 0.18922269344329834, "learning_rate": 1e-06, "loss": -0.0222, "num_tokens": 431948399.0, "reward": 0.6595982313156128, "reward_std": 0.10941943526268005, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 761 }, { "clip_ratio/high_max": 0.0013112820088281296, "clip_ratio/high_mean": 0.0005209065923281742, "clip_ratio/low_mean": 0.00040278148708239314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009236880778189516, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3489.0, "completions/mean_length": 967.7410888671875, "completions/mean_terminated_length": 583.5689086914062, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 7.121282798833819, "grad_norm": 0.18740127980709076, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 432504431.0, "reward": 0.6049107313156128, "reward_std": 0.11501862108707428, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 762 }, { "clip_ratio/high_max": 0.0021683051927539054, "clip_ratio/high_mean": 0.0006840880923846271, "clip_ratio/low_mean": 0.00042543773088254966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011095258250861662, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2823.0, "completions/mean_length": 865.8281860351562, "completions/mean_terminated_length": 557.816650390625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 7.130612244897959, "grad_norm": 0.2093333750963211, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 433055477.0, "reward": 0.6484375, "reward_std": 0.14053022861480713, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 763 }, { "clip_ratio/high_max": 0.0018374361170572229, "clip_ratio/high_mean": 0.0006714117116644047, "clip_ratio/low_mean": 0.0004735927745969093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011450044767116196, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2586.0, "completions/mean_length": 768.7935791015625, "completions/mean_terminated_length": 546.9797973632812, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 7.139941690962099, "grad_norm": 0.2233939915895462, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 433595140.0, "reward": 0.6830357313156128, "reward_std": 0.13519318401813507, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 764 }, { "clip_ratio/high_max": 0.0017288717081100913, "clip_ratio/high_mean": 0.0006363531338138273, "clip_ratio/low_mean": 0.0005515497514352319, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001187902864330681, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 944.1250610351562, "completions/mean_terminated_length": 574.7032470703125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 7.149271137026239, "grad_norm": 0.2202746421098709, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 434155524.0, "reward": 0.6238839626312256, "reward_std": 0.1420365571975708, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 765 }, { "clip_ratio/high_max": 0.0018139493549824692, "clip_ratio/high_mean": 0.0006565253888766165, "clip_ratio/low_mean": 0.0001985701794637862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008550955626560608, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2983.0, "completions/mean_length": 957.060302734375, "completions/mean_terminated_length": 504.0587463378906, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 7.158600583090379, "grad_norm": 0.2239382266998291, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 434633154.0, "reward": 0.676339328289032, "reward_std": 0.10690322518348694, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335687637329, "step": 766 }, { "clip_ratio/high_max": 0.001899087154015433, "clip_ratio/high_mean": 0.000682533589497325, "clip_ratio/low_mean": 0.0005771335800091038, "clip_ratio/low_min": 1.577884358994197e-05, "clip_ratio/region_mean": 0.0012596671549545135, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 927.075927734375, "completions/mean_terminated_length": 555.6558837890625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 7.167930029154519, "grad_norm": 0.26125797629356384, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 435172174.0, "reward": 0.6071428656578064, "reward_std": 0.15390713512897491, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 767 }, { "clip_ratio/high_max": 0.0013509400996554177, "clip_ratio/high_mean": 0.0005508841413757182, "clip_ratio/low_mean": 0.0004875524605267856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010384366250946186, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 918.5803833007812, "completions/mean_terminated_length": 519.4070434570312, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 7.1772594752186585, "grad_norm": 0.22931401431560516, "learning_rate": 1e-06, "loss": -0.0156, "num_tokens": 435671702.0, "reward": 0.6696428656578064, "reward_std": 0.1274154782295227, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 768 }, { "clip_ratio/high_max": 0.0018453910488460679, "clip_ratio/high_mean": 0.0006329586622086936, "clip_ratio/low_mean": 0.0005600428394245682, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011930015025427565, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2554.0, "completions/mean_length": 822.0335083007812, "completions/mean_terminated_length": 553.156982421875, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 7.186588921282799, "grad_norm": 0.6465548276901245, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 436217756.0, "reward": 0.6785714626312256, "reward_std": 0.15398016571998596, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 769 }, { "clip_ratio/high_max": 0.0020515040305326693, "clip_ratio/high_mean": 0.0007472602846974041, "clip_ratio/low_mean": 0.0003975188401454943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011447791184764355, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2206.0, "completions/mean_length": 897.200927734375, "completions/mean_terminated_length": 535.5975341796875, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.1959183673469385, "grad_norm": 0.2725692093372345, "learning_rate": 1e-06, "loss": -0.0464, "num_tokens": 436741432.0, "reward": 0.6584821939468384, "reward_std": 0.14414341747760773, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 770 }, { "clip_ratio/high_max": 0.0015428995538968593, "clip_ratio/high_mean": 0.0005118942772242008, "clip_ratio/low_mean": 0.0004454997917946457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009573940587870311, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 975.9464721679688, "completions/mean_terminated_length": 583.9799194335938, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 7.205247813411079, "grad_norm": 0.18831664323806763, "learning_rate": 1e-06, "loss": -0.0084, "num_tokens": 437291712.0, "reward": 0.59375, "reward_std": 0.1335471272468567, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 771 }, { "clip_ratio/high_max": 0.0021033159573562443, "clip_ratio/high_mean": 0.0007534949218097609, "clip_ratio/low_mean": 0.00028411590278665244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010376108239142923, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 931.1317138671875, "completions/mean_terminated_length": 529.0540771484375, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 7.214577259475218, "grad_norm": 0.246206596493721, "learning_rate": 1e-06, "loss": -0.0473, "num_tokens": 437805702.0, "reward": 0.6540178656578064, "reward_std": 0.13696163892745972, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 772 }, { "clip_ratio/high_max": 0.002066510023723822, "clip_ratio/high_mean": 0.0007286792097147554, "clip_ratio/low_mean": 0.0004168878567725187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011455670464783907, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1846.0, "completions/mean_length": 883.2199096679688, "completions/mean_terminated_length": 479.6042785644531, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 7.223906705539359, "grad_norm": 0.2433871030807495, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 438282347.0, "reward": 0.6082589626312256, "reward_std": 0.15341675281524658, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 773 }, { "clip_ratio/high_max": 0.001539781667815987, "clip_ratio/high_mean": 0.0006261663638724713, "clip_ratio/low_mean": 0.0004075622259733791, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010337285821151454, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3905.0, "completions/mean_length": 826.3995971679688, "completions/mean_terminated_length": 557.8816528320312, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 7.233236151603498, "grad_norm": 0.5587377548217773, "learning_rate": 1e-06, "loss": -0.027, "num_tokens": 438836089.0, "reward": 0.613839328289032, "reward_std": 0.14560766518115997, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 774 }, { "clip_ratio/high_max": 0.001900513940199744, "clip_ratio/high_mean": 0.0005801326587970834, "clip_ratio/low_mean": 0.0002264898992052622, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008066225582297193, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3882.0, "completions/mean_length": 881.1138916015625, "completions/mean_terminated_length": 578.859619140625, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 7.242565597667639, "grad_norm": 0.19209402799606323, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 439401463.0, "reward": 0.6886160969734192, "reward_std": 0.12181740999221802, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 775 }, { "clip_ratio/high_max": 0.0018584589997772127, "clip_ratio/high_mean": 0.0005739138869103044, "clip_ratio/low_mean": 0.0003227495324154006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008966634013631847, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2293.0, "completions/mean_length": 912.6004638671875, "completions/mean_terminated_length": 565.8935546875, "completions/min_length": 187.0, "completions/min_terminated_length": 187.0, "epoch": 7.251895043731778, "grad_norm": 0.18052180111408234, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 439944689.0, "reward": 0.6328125, "reward_std": 0.12155778706073761, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 776 }, { "clip_ratio/high_max": 0.0018825598635885399, "clip_ratio/high_mean": 0.0007097283178154612, "clip_ratio/low_mean": 0.0003853289053949993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010950572395813651, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2795.0, "completions/mean_length": 806.2221069335938, "completions/mean_terminated_length": 531.742431640625, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 7.261224489795918, "grad_norm": 0.19756871461868286, "learning_rate": 1e-06, "loss": -0.0236, "num_tokens": 440466704.0, "reward": 0.7220982313156128, "reward_std": 0.13707223534584045, "rewards/verify_math_reward/mean": 0.7220982313156128, "rewards/verify_math_reward/std": 0.44821491837501526, "step": 777 }, { "clip_ratio/high_max": 0.0020003035642730538, "clip_ratio/high_mean": 0.0007616958573635202, "clip_ratio/low_mean": 0.0003803055578828207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011420014379837085, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2329.0, "completions/mean_length": 822.8292846679688, "completions/mean_terminated_length": 566.8050537109375, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 7.270553935860058, "grad_norm": 0.19197532534599304, "learning_rate": 1e-06, "loss": -0.0256, "num_tokens": 441023191.0, "reward": 0.6473214626312256, "reward_std": 0.144252210855484, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807061672210693, "step": 778 }, { "clip_ratio/high_max": 0.0019053590112889651, "clip_ratio/high_mean": 0.0007509957613365259, "clip_ratio/low_mean": 0.0005742122648371151, "clip_ratio/low_min": 2.489048165443819e-05, "clip_ratio/region_mean": 0.0013252080025267787, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3273.0, "completions/mean_length": 1045.7098388671875, "completions/mean_terminated_length": 605.5018920898438, "completions/min_length": 193.0, "completions/min_terminated_length": 193.0, "epoch": 7.279883381924198, "grad_norm": 0.2804083526134491, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 441589835.0, "reward": 0.5714285969734192, "reward_std": 0.18134015798568726, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 779 }, { "clip_ratio/high_max": 0.002064025196887087, "clip_ratio/high_mean": 0.000743654773941671, "clip_ratio/low_mean": 0.00040148734206013614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001145142101449892, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 797.536865234375, "completions/mean_terminated_length": 539.5343017578125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.289212827988338, "grad_norm": 0.22385987639427185, "learning_rate": 1e-06, "loss": -0.0288, "num_tokens": 442126412.0, "reward": 0.652901828289032, "reward_std": 0.13655048608779907, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 780 }, { "clip_ratio/high_max": 0.0018116132778231986, "clip_ratio/high_mean": 0.0006781190586480079, "clip_ratio/low_mean": 0.00033917441101039003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001017293468976277, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2908.0, "completions/mean_length": 803.5067138671875, "completions/mean_terminated_length": 528.8004760742188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 7.298542274052478, "grad_norm": 0.2284812480211258, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 442646034.0, "reward": 0.6875000596046448, "reward_std": 0.12892432510852814, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 781 }, { "clip_ratio/high_max": 0.0020614859968191013, "clip_ratio/high_mean": 0.0007707177501288243, "clip_ratio/low_mean": 0.00039828915396356024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011690069222822785, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3631.0, "completions/mean_length": 1049.9263916015625, "completions/mean_terminated_length": 578.884033203125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.307871720116618, "grad_norm": 0.23519906401634216, "learning_rate": 1e-06, "loss": -0.0567, "num_tokens": 443188656.0, "reward": 0.5881696939468384, "reward_std": 0.15680059790611267, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924396276473999, "step": 782 }, { "clip_ratio/high_max": 0.0014961940578359645, "clip_ratio/high_mean": 0.0004797290375790908, "clip_ratio/low_mean": 0.0005081013705421356, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009878304026642581, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 857.5714721679688, "completions/mean_terminated_length": 548.7726440429688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 7.317201166180758, "grad_norm": 0.20529448986053467, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 443722896.0, "reward": 0.668526828289032, "reward_std": 0.12207955121994019, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 783 }, { "clip_ratio/high_max": 0.002317278296686709, "clip_ratio/high_mean": 0.0008041735609367606, "clip_ratio/low_mean": 0.0005017438325012336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013059173616056796, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3539.0, "completions/mean_length": 872.6105346679688, "completions/mean_terminated_length": 525.9666137695312, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 7.326530612244898, "grad_norm": 0.21070453524589539, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 444238347.0, "reward": 0.6774553656578064, "reward_std": 0.14222341775894165, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 784 }, { "clip_ratio/high_max": 0.0013067056206637062, "clip_ratio/high_mean": 0.0004163580815657042, "clip_ratio/low_mean": 0.00034855551939472207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007649136241525412, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2023.0, "completions/mean_length": 766.2433471679688, "completions/mean_terminated_length": 540.0262451171875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 7.335860058309038, "grad_norm": 0.5768417119979858, "learning_rate": 1e-06, "loss": -0.0227, "num_tokens": 444773069.0, "reward": 0.6651785969734192, "reward_std": 0.115808866918087, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 785 }, { "clip_ratio/high_max": 0.0016020918628782965, "clip_ratio/high_mean": 0.0005133051927259658, "clip_ratio/low_mean": 0.00039374745938403066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009070526430150494, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2951.0, "completions/mean_length": 872.4241333007812, "completions/mean_terminated_length": 543.325927734375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.345189504373177, "grad_norm": 0.21419286727905273, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 445304777.0, "reward": 0.598214328289032, "reward_std": 0.12823893129825592, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 786 }, { "clip_ratio/high_max": 0.0018684573806240223, "clip_ratio/high_mean": 0.0007049983287288342, "clip_ratio/low_mean": 0.00036744167391589144, "clip_ratio/low_min": 2.0128823962295428e-05, "clip_ratio/region_mean": 0.0010724400308390614, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3301.0, "completions/mean_length": 938.7600708007812, "completions/mean_terminated_length": 528.6771850585938, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.354518950437318, "grad_norm": 0.22720560431480408, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 445804274.0, "reward": 0.6261160969734192, "reward_std": 0.13316552340984344, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 787 }, { "clip_ratio/high_max": 0.0017978480973397382, "clip_ratio/high_mean": 0.0006723729893565178, "clip_ratio/low_mean": 0.0003866412009756459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010590141791908536, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3350.0, "completions/mean_length": 880.50341796875, "completions/mean_terminated_length": 582.481689453125, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 7.363848396501457, "grad_norm": 0.21505652368068695, "learning_rate": 1e-06, "loss": -0.0422, "num_tokens": 446377477.0, "reward": 0.6584821939468384, "reward_std": 0.15067441761493683, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 788 }, { "clip_ratio/high_max": 0.0020738798266393133, "clip_ratio/high_mean": 0.0007104585038177902, "clip_ratio/low_mean": 0.0003293472329914948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010398057293059537, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3430.0, "completions/mean_length": 868.763427734375, "completions/mean_terminated_length": 539.29150390625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 7.373177842565598, "grad_norm": 0.41033071279525757, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 446901761.0, "reward": 0.6819196939468384, "reward_std": 0.13361060619354248, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 789 }, { "clip_ratio/high_max": 0.001817087919334881, "clip_ratio/high_mean": 0.0007647078982699895, "clip_ratio/low_mean": 0.00035661983656609664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011213277393835597, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 720.2645263671875, "completions/mean_terminated_length": 529.1851196289062, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.382507288629737, "grad_norm": 0.21566364169120789, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 447440510.0, "reward": 0.7020089626312256, "reward_std": 0.15439385175704956, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 790 }, { "clip_ratio/high_max": 0.0015683761339460034, "clip_ratio/high_mean": 0.0005569472532442887, "clip_ratio/low_mean": 0.00030527737999364035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008622246423328761, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3516.0, "completions/mean_length": 776.7299194335938, "completions/mean_terminated_length": 529.9736328125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 7.391836734693878, "grad_norm": 0.18326812982559204, "learning_rate": 1e-06, "loss": -0.0338, "num_tokens": 447973084.0, "reward": 0.7053571939468384, "reward_std": 0.11283689737319946, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 791 }, { "clip_ratio/high_max": 0.002036717880400829, "clip_ratio/high_mean": 0.0006804739823564887, "clip_ratio/low_mean": 0.0003834617098164017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010639356914907694, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1847.0, "completions/mean_length": 742.9263916015625, "completions/mean_terminated_length": 493.6570739746094, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 7.401166180758017, "grad_norm": 0.26743683218955994, "learning_rate": 1e-06, "loss": -0.0163, "num_tokens": 448482466.0, "reward": 0.6752232313156128, "reward_std": 0.1305733323097229, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 792 }, { "clip_ratio/high_max": 0.0018211142196378205, "clip_ratio/high_mean": 0.0006659587324975291, "clip_ratio/low_mean": 0.00044132539369456936, "clip_ratio/low_min": 1.855149821494706e-05, "clip_ratio/region_mean": 0.0011072841407440137, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2224.0, "completions/mean_length": 843.2824096679688, "completions/mean_terminated_length": 520.006103515625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.410495626822158, "grad_norm": 0.2430814653635025, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 448995319.0, "reward": 0.7064732313156128, "reward_std": 0.1288815438747406, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 793 }, { "clip_ratio/high_max": 0.00199321500986116, "clip_ratio/high_mean": 0.0007223097145470092, "clip_ratio/low_mean": 0.00025829350238382176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009806032066990156, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3297.0, "completions/mean_length": 763.2600708007812, "completions/mean_terminated_length": 557.9253540039062, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 7.419825072886297, "grad_norm": 0.19316665828227997, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 449552056.0, "reward": 0.7064732313156128, "reward_std": 0.13301467895507812, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 794 }, { "clip_ratio/high_max": 0.0015548774172202684, "clip_ratio/high_mean": 0.0005065612785983831, "clip_ratio/low_mean": 0.00033220593707028456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008387672187382123, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3267.0, "completions/mean_length": 790.0792846679688, "completions/mean_terminated_length": 527.1987915039062, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 7.429154518950437, "grad_norm": 0.25985169410705566, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 450069903.0, "reward": 0.7087053656578064, "reward_std": 0.11532352864742279, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 795 }, { "clip_ratio/high_max": 0.001736435733619146, "clip_ratio/high_mean": 0.0006576968880835921, "clip_ratio/low_mean": 0.0004766974152516923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001134394304244779, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3986.0, "completions/mean_length": 1004.1239013671875, "completions/mean_terminated_length": 598.1199340820312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.438483965014577, "grad_norm": 0.21131226420402527, "learning_rate": 1e-06, "loss": -0.0365, "num_tokens": 450643230.0, "reward": 0.5479910969734192, "reward_std": 0.1590908169746399, "rewards/verify_math_reward/mean": 0.5479910969734192, "rewards/verify_math_reward/std": 0.49796950817108154, "step": 796 }, { "clip_ratio/high_max": 0.0021376903823693283, "clip_ratio/high_mean": 0.0009354207468277309, "clip_ratio/low_mean": 0.0005537491324503208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001489169902924914, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2331.0, "completions/mean_length": 889.2578735351562, "completions/mean_terminated_length": 574.871337890625, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 7.447813411078717, "grad_norm": 0.2466975450515747, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 451199261.0, "reward": 0.6194196939468384, "reward_std": 0.1874253898859024, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 797 }, { "clip_ratio/high_max": 0.0018289342988282442, "clip_ratio/high_mean": 0.0007239878341351869, "clip_ratio/low_mean": 0.0005331186221155804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012571064726216719, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2967.0, "completions/mean_length": 897.7444458007812, "completions/mean_terminated_length": 566.890380859375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 7.457142857142857, "grad_norm": 0.2551487684249878, "learning_rate": 1e-06, "loss": -0.0424, "num_tokens": 451755744.0, "reward": 0.6662946939468384, "reward_std": 0.15687981247901917, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 798 }, { "clip_ratio/high_max": 0.0015417725917359348, "clip_ratio/high_mean": 0.0005219379295340332, "clip_ratio/low_mean": 0.00035861645937984576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008805543911876157, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3499.0, "completions/mean_length": 890.4297485351562, "completions/mean_terminated_length": 589.0513305664062, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 7.466472303206997, "grad_norm": 0.1948302984237671, "learning_rate": 1e-06, "loss": -0.0366, "num_tokens": 452329833.0, "reward": 0.6707589626312256, "reward_std": 0.12982404232025146, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 799 }, { "clip_ratio/high_max": 0.0016610091115580872, "clip_ratio/high_mean": 0.0004783436525030993, "clip_ratio/low_mean": 0.0002772621162421274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007556057662441162, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3150.0, "completions/mean_length": 820.6082763671875, "completions/mean_terminated_length": 551.61474609375, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 7.475801749271137, "grad_norm": 0.17610272765159607, "learning_rate": 1e-06, "loss": -0.0197, "num_tokens": 452877802.0, "reward": 0.6785714626312256, "reward_std": 0.09923569113016129, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 800 }, { "clip_ratio/high_max": 0.0026171008648816496, "clip_ratio/high_mean": 0.0009410387829120737, "clip_ratio/low_mean": 0.00043235775774519425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013733965461142361, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3584.0, "completions/mean_length": 925.06591796875, "completions/mean_terminated_length": 544.5537109375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.485131195335277, "grad_norm": 0.25882914662361145, "learning_rate": 1e-06, "loss": -0.0623, "num_tokens": 453396245.0, "reward": 0.6395089626312256, "reward_std": 0.17044779658317566, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 801 }, { "clip_ratio/high_max": 0.0019658309865917545, "clip_ratio/high_mean": 0.0007635818656126503, "clip_ratio/low_mean": 0.00036012548162034363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011237073413212784, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2235.0, "completions/mean_length": 981.7266235351562, "completions/mean_terminated_length": 577.2244873046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.494460641399417, "grad_norm": 0.20995590090751648, "learning_rate": 1e-06, "loss": -0.0636, "num_tokens": 453938864.0, "reward": 0.6328125, "reward_std": 0.1460520476102829, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 802 }, { "clip_ratio/high_max": 0.0014598001725971699, "clip_ratio/high_mean": 0.00042861368888225115, "clip_ratio/low_mean": 0.0003237422939719181, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007523559579567518, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 865.9721069335938, "completions/mean_terminated_length": 562.2943115234375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 7.503790087463557, "grad_norm": 0.18857206404209137, "learning_rate": 1e-06, "loss": -0.0388, "num_tokens": 454494055.0, "reward": 0.6640625, "reward_std": 0.11599250137805939, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 803 }, { "clip_ratio/high_max": 0.0016289376617351081, "clip_ratio/high_mean": 0.0006079857830627589, "clip_ratio/low_mean": 0.0005249290843494236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011329148619552143, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3022.0, "completions/mean_length": 866.6038208007812, "completions/mean_terminated_length": 528.1343994140625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 7.513119533527696, "grad_norm": 0.2475336641073227, "learning_rate": 1e-06, "loss": -0.0202, "num_tokens": 455014212.0, "reward": 0.6339285969734192, "reward_std": 0.1577039510011673, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 804 }, { "clip_ratio/high_max": 0.0020908709229843225, "clip_ratio/high_mean": 0.0008554086743970402, "clip_ratio/low_mean": 0.0005937854139119736, "clip_ratio/low_min": 1.40860938699916e-05, "clip_ratio/region_mean": 0.0014491941037704237, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3846.0, "completions/mean_length": 871.6752319335938, "completions/mean_terminated_length": 546.8660888671875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 7.522448979591837, "grad_norm": 0.22647230327129364, "learning_rate": 1e-06, "loss": -0.0596, "num_tokens": 455544513.0, "reward": 0.6462053656578064, "reward_std": 0.19069157540798187, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 805 }, { "clip_ratio/high_max": 0.0018042797473754035, "clip_ratio/high_mean": 0.0006075592536944896, "clip_ratio/low_mean": 0.0003418043121428127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000949363580730278, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2748.0, "completions/mean_length": 870.6473388671875, "completions/mean_terminated_length": 563.0953979492188, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 7.531778425655976, "grad_norm": 0.21548768877983093, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 456084413.0, "reward": 0.668526828289032, "reward_std": 0.11956292390823364, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 806 }, { "clip_ratio/high_max": 0.002055928751360625, "clip_ratio/high_mean": 0.0005337901384336874, "clip_ratio/low_mean": 0.0005100326070532901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001043822765495861, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3652.0, "completions/mean_length": 1036.1842041015625, "completions/mean_terminated_length": 558.4580688476562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 7.541107871720117, "grad_norm": 0.21702860295772552, "learning_rate": 1e-06, "loss": -0.0457, "num_tokens": 456604858.0, "reward": 0.5870535969734192, "reward_std": 0.1310279816389084, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 807 }, { "clip_ratio/high_max": 0.0015766515225550393, "clip_ratio/high_mean": 0.0005313608567121264, "clip_ratio/low_mean": 0.0003719614910551172, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009033223341248231, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3162.0, "completions/mean_length": 908.2489013671875, "completions/mean_terminated_length": 608.5458374023438, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.550437317784256, "grad_norm": 0.17865093052387238, "learning_rate": 1e-06, "loss": -0.029, "num_tokens": 457182649.0, "reward": 0.5993303656578064, "reward_std": 0.12039663642644882, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 808 }, { "clip_ratio/high_max": 0.0019642376792035066, "clip_ratio/high_mean": 0.0008519805787727819, "clip_ratio/low_mean": 0.0005484221051119675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014004026488692034, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2262.0, "completions/mean_length": 874.9219360351562, "completions/mean_terminated_length": 593.4684448242188, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 7.559766763848397, "grad_norm": 0.3716540038585663, "learning_rate": 1e-06, "loss": -0.041, "num_tokens": 457759491.0, "reward": 0.6316964626312256, "reward_std": 0.1893431544303894, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 809 }, { "clip_ratio/high_max": 0.002034013294178294, "clip_ratio/high_mean": 0.0006746804865542799, "clip_ratio/low_mean": 0.00048705783683544723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001161738320661243, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 879.8125610351562, "completions/mean_terminated_length": 533.943115234375, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 7.569096209912536, "grad_norm": 0.4034753739833832, "learning_rate": 1e-06, "loss": -0.0239, "num_tokens": 458293491.0, "reward": 0.6462053656578064, "reward_std": 0.13496747612953186, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 810 }, { "clip_ratio/high_max": 0.001661057016463019, "clip_ratio/high_mean": 0.0006062400598239037, "clip_ratio/low_mean": 0.0005257374787106528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011319775512674823, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2568.0, "completions/mean_length": 932.0167846679688, "completions/mean_terminated_length": 583.0768432617188, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 7.578425655976677, "grad_norm": 0.22804972529411316, "learning_rate": 1e-06, "loss": -0.0511, "num_tokens": 458849586.0, "reward": 0.6116071939468384, "reward_std": 0.15026254951953888, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 811 }, { "clip_ratio/high_max": 0.0021547215183090884, "clip_ratio/high_mean": 0.0006679528969470994, "clip_ratio/low_mean": 0.00046255778045178886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011305106745567173, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3086.0, "completions/mean_length": 890.825927734375, "completions/mean_terminated_length": 563.6063842773438, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 7.587755102040816, "grad_norm": 0.2476268708705902, "learning_rate": 1e-06, "loss": -0.0341, "num_tokens": 459400774.0, "reward": 0.6004464626312256, "reward_std": 0.1620645970106125, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 812 }, { "clip_ratio/high_max": 0.0015425645779032493, "clip_ratio/high_mean": 0.0004173754109615402, "clip_ratio/low_mean": 0.0003525760248521692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007699514299019938, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3272.0, "completions/mean_length": 917.65966796875, "completions/mean_terminated_length": 540.702880859375, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 7.597084548104956, "grad_norm": 0.19631144404411316, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 459921333.0, "reward": 0.6875000596046448, "reward_std": 0.11565662175416946, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 813 }, { "clip_ratio/high_max": 0.0020850097862421535, "clip_ratio/high_mean": 0.0006305483966571046, "clip_ratio/low_mean": 0.00039854229316915735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001029090675729094, "completions/clipped_ratio": 0.0647321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1630.0, "completions/mean_length": 711.372802734375, "completions/mean_terminated_length": 477.11456298828125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 7.606413994169096, "grad_norm": 0.20963580906391144, "learning_rate": 1e-06, "loss": -0.0137, "num_tokens": 460410331.0, "reward": 0.7444196939468384, "reward_std": 0.10870447009801865, "rewards/verify_math_reward/mean": 0.7444196343421936, "rewards/verify_math_reward/std": 0.43643057346343994, "step": 814 }, { "clip_ratio/high_max": 0.0014944998920327635, "clip_ratio/high_mean": 0.0004622150722752849, "clip_ratio/low_mean": 0.0004701450361608295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009323601188953035, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2224.0, "completions/mean_length": 961.6875610351562, "completions/mean_terminated_length": 554.5825805664062, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 7.615743440233236, "grad_norm": 0.26445794105529785, "learning_rate": 1e-06, "loss": -0.0436, "num_tokens": 460935651.0, "reward": 0.6194196939468384, "reward_std": 0.13121412694454193, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 815 }, { "clip_ratio/high_max": 0.0018435135825711768, "clip_ratio/high_mean": 0.0006467810808317154, "clip_ratio/low_mean": 0.0004172930639469996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010640741493261885, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 885.1495971679688, "completions/mean_terminated_length": 561.69775390625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 7.625072886297376, "grad_norm": 0.20902347564697266, "learning_rate": 1e-06, "loss": -0.0386, "num_tokens": 461479025.0, "reward": 0.6015625, "reward_std": 0.1519550383090973, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 816 }, { "clip_ratio/high_max": 0.001077953122148756, "clip_ratio/high_mean": 0.0003348792638462328, "clip_ratio/low_mean": 0.00040514794545742916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007400272097584093, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3724.0, "completions/mean_length": 860.5000610351562, "completions/mean_terminated_length": 547.642578125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 7.634402332361516, "grad_norm": 0.2130817174911499, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 462012081.0, "reward": 0.629464328289032, "reward_std": 0.11393164098262787, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 817 }, { "clip_ratio/high_max": 0.0017106234954553656, "clip_ratio/high_mean": 0.0006426229138014605, "clip_ratio/low_mean": 0.00048620864708937006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001128831565438304, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 884.247802734375, "completions/mean_terminated_length": 551.9974975585938, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 7.643731778425656, "grad_norm": 0.22409754991531372, "learning_rate": 1e-06, "loss": -0.0487, "num_tokens": 462551719.0, "reward": 0.6160714626312256, "reward_std": 0.1380895972251892, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 818 }, { "clip_ratio/high_max": 0.002189966689911671, "clip_ratio/high_mean": 0.0008116880489978939, "clip_ratio/low_mean": 0.0004735569018521346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012852449472120497, "completions/clipped_ratio": 0.0658482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2802.0, "completions/mean_length": 813.2377319335938, "completions/mean_terminated_length": 581.8363037109375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.653061224489796, "grad_norm": 0.3138972222805023, "learning_rate": 1e-06, "loss": -0.0353, "num_tokens": 463136284.0, "reward": 0.7031250596046448, "reward_std": 0.1385025680065155, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 819 }, { "clip_ratio/high_max": 0.0021604646608466282, "clip_ratio/high_mean": 0.0007709093206358375, "clip_ratio/low_mean": 0.00033970701042562723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011106163328804541, "completions/clipped_ratio": 0.0636160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3915.0, "completions/mean_length": 742.036865234375, "completions/mean_terminated_length": 514.1752319335938, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 7.662390670553936, "grad_norm": 0.32489922642707825, "learning_rate": 1e-06, "loss": -0.0367, "num_tokens": 463650245.0, "reward": 0.7053571939468384, "reward_std": 0.13981597125530243, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 820 }, { "clip_ratio/high_max": 0.0020404564202181064, "clip_ratio/high_mean": 0.0007672004612686578, "clip_ratio/low_mean": 0.0005154649734322447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001282665405597072, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3377.0, "completions/mean_length": 891.9342041015625, "completions/mean_terminated_length": 569.1658325195312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 7.671720116618076, "grad_norm": 0.21399322152137756, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 464192634.0, "reward": 0.637276828289032, "reward_std": 0.16702328622341156, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 821 }, { "clip_ratio/high_max": 0.0018740105551842134, "clip_ratio/high_mean": 0.0006350987941914354, "clip_ratio/low_mean": 0.0006803349815527326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013154337611922529, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3107.0, "completions/mean_length": 895.1495971679688, "completions/mean_terminated_length": 550.9295654296875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 7.681049562682215, "grad_norm": 0.2542259097099304, "learning_rate": 1e-06, "loss": -0.0367, "num_tokens": 464734632.0, "reward": 0.5993303656578064, "reward_std": 0.13294051587581635, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 822 }, { "clip_ratio/high_max": 0.0019385783598409034, "clip_ratio/high_mean": 0.0007336593480431475, "clip_ratio/low_mean": 0.0005824763429700397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013161357164790388, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 876.8225708007812, "completions/mean_terminated_length": 512.91552734375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 7.690379008746356, "grad_norm": 0.2604391574859619, "learning_rate": 1e-06, "loss": -0.0359, "num_tokens": 465233641.0, "reward": 0.6383928656578064, "reward_std": 0.1440257877111435, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 823 }, { "clip_ratio/high_max": 0.0018905918987002224, "clip_ratio/high_mean": 0.0006724767918058205, "clip_ratio/low_mean": 0.00041785722532949876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001090334051696118, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1778.0, "completions/mean_length": 899.8292846679688, "completions/mean_terminated_length": 534.0982666015625, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 7.699708454810495, "grad_norm": 0.25682181119918823, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 465750000.0, "reward": 0.6127232313156128, "reward_std": 0.1342499852180481, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 824 }, { "clip_ratio/high_max": 0.002026215923251584, "clip_ratio/high_mean": 0.0008233674543589586, "clip_ratio/low_mean": 0.0006635132303927094, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014868807265884243, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3730.0, "completions/mean_length": 883.513427734375, "completions/mean_terminated_length": 568.563720703125, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 7.709037900874636, "grad_norm": 0.24753423035144806, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 466308996.0, "reward": 0.6283482313156128, "reward_std": 0.16386334598064423, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 825 }, { "clip_ratio/high_max": 0.001801125647034496, "clip_ratio/high_mean": 0.0006470480766438413, "clip_ratio/low_mean": 0.00027894574031961383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009259937905881088, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3068.0, "completions/mean_length": 792.7745971679688, "completions/mean_terminated_length": 538.6802978515625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 7.718367346938775, "grad_norm": 0.20461122691631317, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 466851290.0, "reward": 0.6696428656578064, "reward_std": 0.11592015624046326, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 826 }, { "clip_ratio/high_max": 0.0016967389274213929, "clip_ratio/high_mean": 0.0006368712183757452, "clip_ratio/low_mean": 0.00031680445749771025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009536756588204298, "completions/clipped_ratio": 0.0714285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2728.0, "completions/mean_length": 780.2801513671875, "completions/mean_terminated_length": 525.2247924804688, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.727696793002916, "grad_norm": 0.275640606880188, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 467383877.0, "reward": 0.6595982313156128, "reward_std": 0.13557478785514832, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 827 }, { "clip_ratio/high_max": 0.0018999132298631594, "clip_ratio/high_mean": 0.00069778775832674, "clip_ratio/low_mean": 0.00032354430095438147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010213320383627433, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 899.1105346679688, "completions/mean_terminated_length": 550.9343872070312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 7.737026239067055, "grad_norm": 0.21681562066078186, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 467921776.0, "reward": 0.5993303656578064, "reward_std": 0.1300095021724701, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 828 }, { "clip_ratio/high_max": 0.0021351910691009834, "clip_ratio/high_mean": 0.0007330894768529106, "clip_ratio/low_mean": 0.0004994228811483481, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012325123716436792, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 853.185302734375, "completions/mean_terminated_length": 504.452392578125, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 7.746355685131196, "grad_norm": 0.2287919819355011, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 468420022.0, "reward": 0.6651785969734192, "reward_std": 0.1442507952451706, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 829 }, { "clip_ratio/high_max": 0.0017939935605681967, "clip_ratio/high_mean": 0.0007073240094541688, "clip_ratio/low_mean": 0.000362676294571429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001070000318577513, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 1993.0, "completions/mean_length": 869.2031860351562, "completions/mean_terminated_length": 535.3965454101562, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 7.755685131195335, "grad_norm": 0.24683068692684174, "learning_rate": 1e-06, "loss": -0.0401, "num_tokens": 468947580.0, "reward": 0.645089328289032, "reward_std": 0.12925811111927032, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 830 }, { "clip_ratio/high_max": 0.00162812970665982, "clip_ratio/high_mean": 0.000563262915420637, "clip_ratio/low_mean": 0.00025965850636566756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008229214272432728, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2041.0, "completions/mean_length": 989.7600708007812, "completions/mean_terminated_length": 546.011474609375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 7.765014577259475, "grad_norm": 0.24280503392219543, "learning_rate": 1e-06, "loss": -0.0501, "num_tokens": 469462389.0, "reward": 0.65625, "reward_std": 0.1282082498073578, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 831 }, { "clip_ratio/high_max": 0.001739942243148107, "clip_ratio/high_mean": 0.0005105355357954977, "clip_ratio/low_mean": 0.0005892741050956829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001099809625884518, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 944.52685546875, "completions/mean_terminated_length": 579.5367431640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 7.774344023323615, "grad_norm": 0.22110724449157715, "learning_rate": 1e-06, "loss": -0.0225, "num_tokens": 470009301.0, "reward": 0.5546875, "reward_std": 0.11994129419326782, "rewards/verify_math_reward/mean": 0.5546875, "rewards/verify_math_reward/std": 0.4972778558731079, "step": 832 }, { "clip_ratio/high_max": 0.0019482617572066374, "clip_ratio/high_mean": 0.0007169645268731983, "clip_ratio/low_mean": 0.0006119113368185936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013288758491398767, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1835.0, "completions/mean_length": 755.4375610351562, "completions/mean_terminated_length": 515.6842041015625, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 7.783673469387755, "grad_norm": 0.2862670123577118, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 470528605.0, "reward": 0.6629464626312256, "reward_std": 0.14804762601852417, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 833 }, { "clip_ratio/high_max": 0.0015325476088037249, "clip_ratio/high_mean": 0.0005396471333369846, "clip_ratio/low_mean": 0.00030326443584272056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008429115623584948, "completions/clipped_ratio": 0.0613839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2519.0, "completions/mean_length": 737.5167846679688, "completions/mean_terminated_length": 517.8775024414062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 7.793002915451895, "grad_norm": 0.17456020414829254, "learning_rate": 1e-06, "loss": -0.0191, "num_tokens": 471061236.0, "reward": 0.7008928656578064, "reward_std": 0.11757621169090271, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 834 }, { "clip_ratio/high_max": 0.0020878899449598975, "clip_ratio/high_mean": 0.0005363087943806022, "clip_ratio/low_mean": 0.00040220899290943635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009385177891090279, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3485.0, "completions/mean_length": 904.0938110351562, "completions/mean_terminated_length": 556.4603881835938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 7.802332361516035, "grad_norm": 0.1644892543554306, "learning_rate": 1e-06, "loss": -0.0389, "num_tokens": 471603496.0, "reward": 0.6227678656578064, "reward_std": 0.09990689158439636, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644601345062, "step": 835 }, { "clip_ratio/high_max": 0.001960821384273004, "clip_ratio/high_mean": 0.0006039140553184552, "clip_ratio/low_mean": 0.0004193444408429059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010232584754703566, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2880.0, "completions/mean_length": 874.4810791015625, "completions/mean_terminated_length": 523.6224975585938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 7.811661807580175, "grad_norm": 0.20616614818572998, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 472127255.0, "reward": 0.6484375, "reward_std": 0.11956290900707245, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 836 }, { "clip_ratio/high_max": 0.0017060250393114984, "clip_ratio/high_mean": 0.0006282697186179576, "clip_ratio/low_mean": 0.00040576628725830233, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010340360076952493, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 873.2221069335938, "completions/mean_terminated_length": 552.9214477539062, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 7.820991253644315, "grad_norm": 0.21669143438339233, "learning_rate": 1e-06, "loss": -0.0299, "num_tokens": 472663790.0, "reward": 0.65625, "reward_std": 0.1287720799446106, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 837 }, { "clip_ratio/high_max": 0.0015515964187216014, "clip_ratio/high_mean": 0.0006001234341965755, "clip_ratio/low_mean": 0.00032068414611785556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009208075716742314, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3323.0, "completions/mean_length": 842.8527221679688, "completions/mean_terminated_length": 519.5337524414062, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 7.830320699708455, "grad_norm": 0.20231305062770844, "learning_rate": 1e-06, "loss": -0.0297, "num_tokens": 473174354.0, "reward": 0.6540178656578064, "reward_std": 0.13083365559577942, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 838 }, { "clip_ratio/high_max": 0.0021013850200688466, "clip_ratio/high_mean": 0.0007318230218515964, "clip_ratio/low_mean": 0.0004379845299808949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011698075522872387, "completions/clipped_ratio": 0.0535714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1890.0, "completions/mean_length": 706.5067138671875, "completions/mean_terminated_length": 514.6486206054688, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 7.839650145772595, "grad_norm": 0.20463016629219055, "learning_rate": 1e-06, "loss": -0.046, "num_tokens": 473706056.0, "reward": 0.7031250596046448, "reward_std": 0.13485799729824066, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 839 }, { "clip_ratio/high_max": 0.0016366375748475548, "clip_ratio/high_mean": 0.000635799436167872, "clip_ratio/low_mean": 0.00037692594651161926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001012725408145343, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3316.0, "completions/mean_length": 839.6942138671875, "completions/mean_terminated_length": 533.5457763671875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 7.848979591836734, "grad_norm": 0.21159310638904572, "learning_rate": 1e-06, "loss": -0.0604, "num_tokens": 474227910.0, "reward": 0.6395089626312256, "reward_std": 0.1457924246788025, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 840 }, { "clip_ratio/high_max": 0.0020121899869991466, "clip_ratio/high_mean": 0.0008275412565126317, "clip_ratio/low_mean": 0.00043838469673573854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001265925955522107, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3754.0, "completions/mean_length": 757.0011596679688, "completions/mean_terminated_length": 491.4903564453125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 7.858309037900875, "grad_norm": 0.2540360689163208, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 474733247.0, "reward": 0.691964328289032, "reward_std": 0.1593957394361496, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 841 }, { "clip_ratio/high_max": 0.0017584055021870881, "clip_ratio/high_mean": 0.0006672156773674942, "clip_ratio/low_mean": 0.000606639578791146, "clip_ratio/low_min": 2.9315197025425732e-05, "clip_ratio/region_mean": 0.0012738552031805739, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3006.0, "completions/mean_length": 876.3638916015625, "completions/mean_terminated_length": 569.3569946289062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 7.867638483965014, "grad_norm": 0.2514052093029022, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 475283549.0, "reward": 0.6160714626312256, "reward_std": 0.1576707363128662, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 842 }, { "clip_ratio/high_max": 0.0016686324415786657, "clip_ratio/high_mean": 0.0005509141410584562, "clip_ratio/low_mean": 0.00042700323547251173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009779173833521781, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 1800.0, "completions/mean_length": 800.6551513671875, "completions/mean_terminated_length": 490.8363952636719, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 7.876967930029155, "grad_norm": 0.28914740681648254, "learning_rate": 1e-06, "loss": -0.0257, "num_tokens": 475774712.0, "reward": 0.6484375, "reward_std": 0.1236218810081482, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 843 }, { "clip_ratio/high_max": 0.0016933851802605204, "clip_ratio/high_mean": 0.0006025929124007234, "clip_ratio/low_mean": 0.0005692411987183732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001171834101114655, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1984.0, "completions/mean_length": 760.5558471679688, "completions/mean_terminated_length": 495.3277282714844, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 7.886297376093294, "grad_norm": 0.2057284116744995, "learning_rate": 1e-06, "loss": -0.0296, "num_tokens": 476268986.0, "reward": 0.6908482313156128, "reward_std": 0.11768680810928345, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 844 }, { "clip_ratio/high_max": 0.0019498175824992359, "clip_ratio/high_mean": 0.0007854042469261913, "clip_ratio/low_mean": 0.0005245197789918166, "clip_ratio/low_min": 1.3676149137609173e-05, "clip_ratio/region_mean": 0.0013099240204610396, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2312.0, "completions/mean_length": 774.622802734375, "completions/mean_terminated_length": 536.2463989257812, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 7.895626822157435, "grad_norm": 0.2517111897468567, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 476814640.0, "reward": 0.6339285969734192, "reward_std": 0.17307278513908386, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 845 }, { "clip_ratio/high_max": 0.0018377655778749613, "clip_ratio/high_mean": 0.0006429817076423205, "clip_ratio/low_mean": 0.00043769156400230713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010806732534547336, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2761.0, "completions/mean_length": 818.9564819335938, "completions/mean_terminated_length": 519.5919799804688, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 7.904956268221574, "grad_norm": 0.2622203826904297, "learning_rate": 1e-06, "loss": -0.0288, "num_tokens": 477337889.0, "reward": 0.660714328289032, "reward_std": 0.13121342658996582, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 846 }, { "clip_ratio/high_max": 0.0020022262178827077, "clip_ratio/high_mean": 0.0007785756806697464, "clip_ratio/low_mean": 0.0007115808166417992, "clip_ratio/low_min": 3.995268889411818e-05, "clip_ratio/region_mean": 0.0014901565009495243, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3312.0, "completions/mean_length": 829.1027221679688, "completions/mean_terminated_length": 543.6456298828125, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 7.914285714285715, "grad_norm": 0.4514482021331787, "learning_rate": 1e-06, "loss": -0.023, "num_tokens": 477886685.0, "reward": 0.6551339626312256, "reward_std": 0.17731650173664093, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 847 }, { "clip_ratio/high_max": 0.002184198223403655, "clip_ratio/high_mean": 0.0009001014686873532, "clip_ratio/low_mean": 0.00044511800069813034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013452194798446726, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3173.0, "completions/mean_length": 863.5714721679688, "completions/mean_terminated_length": 551.0110473632812, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 7.923615160349854, "grad_norm": 0.5183972120285034, "learning_rate": 1e-06, "loss": -0.0683, "num_tokens": 478428389.0, "reward": 0.6283482313156128, "reward_std": 0.15675964951515198, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 848 }, { "clip_ratio/high_max": 0.002004287212912459, "clip_ratio/high_mean": 0.0007069515486364253, "clip_ratio/low_mean": 0.0003856417215502006, "clip_ratio/low_min": 2.179218972742092e-05, "clip_ratio/region_mean": 0.0010925932874670252, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3662.0, "completions/mean_length": 847.6585083007812, "completions/mean_terminated_length": 542.2588500976562, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 7.932944606413994, "grad_norm": 0.19054283201694489, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 478963731.0, "reward": 0.6272321939468384, "reward_std": 0.1347392499446869, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 849 }, { "clip_ratio/high_max": 0.002129192725988105, "clip_ratio/high_mean": 0.0007958323694765568, "clip_ratio/low_mean": 0.0004401272362883901, "clip_ratio/low_min": 1.14009490062017e-05, "clip_ratio/region_mean": 0.0012359595966699999, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3765.0, "completions/mean_length": 967.4263916015625, "completions/mean_terminated_length": 569.959716796875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 7.942274052478134, "grad_norm": 0.35868701338768005, "learning_rate": 1e-06, "loss": -0.0567, "num_tokens": 479511625.0, "reward": 0.613839328289032, "reward_std": 0.17479778826236725, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 850 }, { "clip_ratio/high_max": 0.0016355398001905996, "clip_ratio/high_mean": 0.0005937251335126348, "clip_ratio/low_mean": 0.00045768182644678745, "clip_ratio/low_min": 1.6322799638146535e-05, "clip_ratio/region_mean": 0.0010514069799683057, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3470.0, "completions/mean_length": 852.9609985351562, "completions/mean_terminated_length": 565.3037719726562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 7.9516034985422746, "grad_norm": 0.2254210114479065, "learning_rate": 1e-06, "loss": -0.0368, "num_tokens": 480063022.0, "reward": 0.6238839626312256, "reward_std": 0.15041157603263855, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 851 }, { "clip_ratio/high_max": 0.0021271073710522614, "clip_ratio/high_mean": 0.0008197276983992197, "clip_ratio/low_mean": 0.0005710192399419611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013907469765399583, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3977.0, "completions/mean_length": 958.8170166015625, "completions/mean_terminated_length": 582.35498046875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 7.960932944606414, "grad_norm": 0.31778720021247864, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 480617722.0, "reward": 0.5993303656578064, "reward_std": 0.15417630970478058, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 852 }, { "clip_ratio/high_max": 0.0018951149613712914, "clip_ratio/high_mean": 0.0008180319509847322, "clip_ratio/low_mean": 0.0005928526206844253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014108845680311788, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2612.0, "completions/mean_length": 859.3158569335938, "completions/mean_terminated_length": 555.01220703125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 7.970262390670554, "grad_norm": 0.2911638915538788, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 481153149.0, "reward": 0.6752232313156128, "reward_std": 0.17836636304855347, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 853 }, { "clip_ratio/high_max": 0.0017786150056053884, "clip_ratio/high_mean": 0.0006056256024749018, "clip_ratio/low_mean": 0.00037553645870502805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009811620620894246, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 867.638427734375, "completions/mean_terminated_length": 529.2774047851562, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 7.979591836734694, "grad_norm": 0.20771653950214386, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 481667841.0, "reward": 0.6852678656578064, "reward_std": 0.1194855347275734, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 854 }, { "clip_ratio/high_max": 0.0018286980030097766, "clip_ratio/high_mean": 0.000561557541914226, "clip_ratio/low_mean": 0.0004962916764270631, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010578492365311831, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3810.0, "completions/mean_length": 872.4342041015625, "completions/mean_terminated_length": 552.0552368164062, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.988921282798834, "grad_norm": 0.21716172993183136, "learning_rate": 1e-06, "loss": -0.0479, "num_tokens": 482218878.0, "reward": 0.6026785969734192, "reward_std": 0.13639894127845764, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 855 }, { "clip_ratio/high_max": 0.0015286402122001164, "clip_ratio/high_mean": 0.0005184036726859631, "clip_ratio/low_mean": 0.00045618433841809747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009745880397531437, "completions/clipped_ratio": 0.09659090909090906, "completions/max_length": 4096.0, "completions/max_terminated_length": 2015.0, "completions/mean_length": 954.9716186523438, "completions/mean_terminated_length": 619.1383666992188, "completions/min_length": 197.0, "completions/min_terminated_length": 197.0, "epoch": 7.998250728862974, "grad_norm": 0.6963316798210144, "learning_rate": 1e-06, "loss": -0.0157, "num_tokens": 482733549.0, "reward": 0.6707589626312256, "reward_std": 0.1159183457493782, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 856 }, { "clip_ratio/high_max": 0.0017244667797058355, "clip_ratio/high_mean": 0.0005943860869592754, "clip_ratio/low_mean": 0.00042664310967666097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010210292020929046, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2205.0, "completions/mean_length": 869.1629638671875, "completions/mean_terminated_length": 561.469482421875, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 8.00932944606414, "grad_norm": 15.352341651916504, "learning_rate": 1e-06, "loss": -0.0328, "num_tokens": 483287247.0, "reward": 0.629464328289032, "reward_std": 0.13714709877967834, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 857 }, { "clip_ratio/high_max": 0.001815902487578569, "clip_ratio/high_mean": 0.000629084113825229, "clip_ratio/low_mean": 0.0005884908350708429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012175749398011249, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2103.0, "completions/mean_length": 833.4766235351562, "completions/mean_terminated_length": 526.7435913085938, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 8.018658892128279, "grad_norm": 0.33955374360084534, "learning_rate": 1e-06, "loss": -0.0171, "num_tokens": 483814506.0, "reward": 0.699776828289032, "reward_std": 0.13583439588546753, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 858 }, { "clip_ratio/high_max": 0.0015145917486734106, "clip_ratio/high_mean": 0.0005256214504925083, "clip_ratio/low_mean": 0.0002786210413887602, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008042424833547557, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 957.14404296875, "completions/mean_terminated_length": 562.8153076171875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 8.02798833819242, "grad_norm": 0.2032623589038849, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 484347099.0, "reward": 0.625, "reward_std": 0.09796436131000519, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 859 }, { "clip_ratio/high_max": 0.001714057958452031, "clip_ratio/high_mean": 0.000666359880597156, "clip_ratio/low_mean": 0.00044313512262306176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011094949950347655, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 909.4699096679688, "completions/mean_terminated_length": 605.6198120117188, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 8.03731778425656, "grad_norm": 0.2106010764837265, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 484923064.0, "reward": 0.6462053656578064, "reward_std": 0.1527452915906906, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 860 }, { "clip_ratio/high_max": 0.0017888393049361184, "clip_ratio/high_mean": 0.00051904266456404, "clip_ratio/low_mean": 0.00037209191259535146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00089113457397616, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3026.0, "completions/mean_length": 814.114990234375, "completions/mean_terminated_length": 514.3081665039062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.0466472303207, "grad_norm": 0.22404490411281586, "learning_rate": 1e-06, "loss": -0.0477, "num_tokens": 485430359.0, "reward": 0.6741071939468384, "reward_std": 0.11652494221925735, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692258834839, "step": 861 }, { "clip_ratio/high_max": 0.0017333504147245549, "clip_ratio/high_mean": 0.0006453966961998958, "clip_ratio/low_mean": 0.00027636869162961375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009217654042004142, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4010.0, "completions/mean_length": 991.5469360351562, "completions/mean_terminated_length": 534.4251098632812, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.055976676384839, "grad_norm": 0.2290908694267273, "learning_rate": 1e-06, "loss": -0.058, "num_tokens": 485929177.0, "reward": 0.684151828289032, "reward_std": 0.1170462965965271, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 862 }, { "clip_ratio/high_max": 0.001612779811694054, "clip_ratio/high_mean": 0.0006354074148475775, "clip_ratio/low_mean": 0.0003885918279138423, "clip_ratio/low_min": 1.467480615247041e-05, "clip_ratio/region_mean": 0.0010239992552669719, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3949.0, "completions/mean_length": 916.3136596679688, "completions/mean_terminated_length": 548.0560302734375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 8.06530612244898, "grad_norm": 0.21813301742076874, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 486473602.0, "reward": 0.6227678656578064, "reward_std": 0.1363978087902069, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 863 }, { "clip_ratio/high_max": 0.0016862124830367975, "clip_ratio/high_mean": 0.0006248824356589466, "clip_ratio/low_mean": 0.0002908805540755566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009157629938272294, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3890.0, "completions/mean_length": 887.0569458007812, "completions/mean_terminated_length": 559.45263671875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.07463556851312, "grad_norm": 0.41344738006591797, "learning_rate": 1e-06, "loss": -0.0673, "num_tokens": 487014325.0, "reward": 0.6629464626312256, "reward_std": 0.12648296356201172, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 864 }, { "clip_ratio/high_max": 0.0019127090854453854, "clip_ratio/high_mean": 0.0006246632092370419, "clip_ratio/low_mean": 0.0006766343158233212, "clip_ratio/low_min": 1.775568125594873e-05, "clip_ratio/region_mean": 0.0013012975341553101, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2258.0, "completions/mean_length": 799.7299194335938, "completions/mean_terminated_length": 516.0509033203125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 8.08396501457726, "grad_norm": 0.24503915011882782, "learning_rate": 1e-06, "loss": -0.019, "num_tokens": 487537491.0, "reward": 0.6629464626312256, "reward_std": 0.13185282051563263, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 865 }, { "clip_ratio/high_max": 0.0016169263690244406, "clip_ratio/high_mean": 0.0005210784593145945, "clip_ratio/low_mean": 0.0003487113672235864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008697898374521174, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3092.0, "completions/mean_length": 775.1886596679688, "completions/mean_terminated_length": 515.4380493164062, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 8.093294460641399, "grad_norm": 0.2012886106967926, "learning_rate": 1e-06, "loss": -0.0259, "num_tokens": 488051460.0, "reward": 0.6930803656578064, "reward_std": 0.1169707253575325, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 866 }, { "clip_ratio/high_max": 0.0017281859545619227, "clip_ratio/high_mean": 0.000541291481567896, "clip_ratio/low_mean": 0.0003903100564457418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009316015712101944, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 1076.1295166015625, "completions/mean_terminated_length": 586.526611328125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.102623906705539, "grad_norm": 0.2492036670446396, "learning_rate": 1e-06, "loss": -0.0466, "num_tokens": 488584376.0, "reward": 0.59375, "reward_std": 0.13981598615646362, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 867 }, { "clip_ratio/high_max": 0.0022218978810997214, "clip_ratio/high_mean": 0.0008337921426573303, "clip_ratio/low_mean": 0.0004655011571230716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012992933225177694, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 893.5781860351562, "completions/mean_terminated_length": 549.1890869140625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.11195335276968, "grad_norm": 238.7201690673828, "learning_rate": 1e-06, "loss": -0.0326, "num_tokens": 489126470.0, "reward": 0.6618303656578064, "reward_std": 0.1555984914302826, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 868 }, { "clip_ratio/high_max": 0.0020096220141567755, "clip_ratio/high_mean": 0.0007902960333012743, "clip_ratio/low_mean": 0.0004401411270009703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012304371739446651, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3103.0, "completions/mean_length": 847.7745971679688, "completions/mean_terminated_length": 538.0415649414062, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 8.12128279883382, "grad_norm": 0.2761950194835663, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 489648748.0, "reward": 0.6484375, "reward_std": 0.1641664206981659, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 869 }, { "clip_ratio/high_max": 0.0014475346979452297, "clip_ratio/high_mean": 0.000513871299517632, "clip_ratio/low_mean": 0.00033935469491552794, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008532259871572023, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 939.6763916015625, "completions/mean_terminated_length": 556.4931030273438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 8.130612244897959, "grad_norm": 0.3890567719936371, "learning_rate": 1e-06, "loss": -0.0449, "num_tokens": 490178770.0, "reward": 0.606026828289032, "reward_std": 0.11276091635227203, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 870 }, { "clip_ratio/high_max": 0.0016950298231677152, "clip_ratio/high_mean": 0.0005651328647218179, "clip_ratio/low_mean": 0.0004706784930021968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010358113868278451, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 843.036865234375, "completions/mean_terminated_length": 550.1909790039062, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 8.139941690962099, "grad_norm": 0.23404917120933533, "learning_rate": 1e-06, "loss": -0.007, "num_tokens": 490726339.0, "reward": 0.684151828289032, "reward_std": 0.12982404232025146, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 871 }, { "clip_ratio/high_max": 0.0016453908865514677, "clip_ratio/high_mean": 0.0006381078073900426, "clip_ratio/low_mean": 0.00038285072605503956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010209585325355874, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3089.0, "completions/mean_length": 906.99560546875, "completions/mean_terminated_length": 537.6587524414062, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 8.14927113702624, "grad_norm": 0.2166660577058792, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 491254479.0, "reward": 0.6495535969734192, "reward_std": 0.14030200242996216, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 872 }, { "clip_ratio/high_max": 0.001982413032237673, "clip_ratio/high_mean": 0.0006984776791796321, "clip_ratio/low_mean": 0.0003224920515094709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010209697193204192, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 839.3013916015625, "completions/mean_terminated_length": 537.4609985351562, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 8.15860058309038, "grad_norm": 0.2352529764175415, "learning_rate": 1e-06, "loss": -0.0565, "num_tokens": 491787909.0, "reward": 0.7276785969734192, "reward_std": 0.14609482884407043, "rewards/verify_math_reward/mean": 0.7276785969734192, "rewards/verify_math_reward/std": 0.4454030692577362, "step": 873 }, { "clip_ratio/high_max": 0.00204184370522853, "clip_ratio/high_mean": 0.000725960026102257, "clip_ratio/low_mean": 0.0004920503793073294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001218010402226355, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3599.0, "completions/mean_length": 969.3047485351562, "completions/mean_terminated_length": 589.7183837890625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 8.167930029154519, "grad_norm": 0.24664461612701416, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 492351654.0, "reward": 0.6316964626312256, "reward_std": 0.16499200463294983, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 874 }, { "clip_ratio/high_max": 0.0021251223297440447, "clip_ratio/high_mean": 0.000931073915126035, "clip_ratio/low_mean": 0.0005393568981162389, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014704307832289487, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3360.0, "completions/mean_length": 938.8035888671875, "completions/mean_terminated_length": 551.0776977539062, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 8.177259475218658, "grad_norm": 0.2628461420536041, "learning_rate": 1e-06, "loss": -0.0787, "num_tokens": 492871910.0, "reward": 0.637276828289032, "reward_std": 0.1826542466878891, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 875 }, { "clip_ratio/high_max": 0.0018862502183765173, "clip_ratio/high_mean": 0.0006135445682957652, "clip_ratio/low_mean": 0.0003818796196810581, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000995424214124796, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 826.864990234375, "completions/mean_terminated_length": 493.1156005859375, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 8.186588921282798, "grad_norm": 0.25860628485679626, "learning_rate": 1e-06, "loss": -0.0254, "num_tokens": 493365341.0, "reward": 0.6863839626312256, "reward_std": 0.1401950567960739, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 876 }, { "clip_ratio/high_max": 0.002142851193639217, "clip_ratio/high_mean": 0.0007946861342134071, "clip_ratio/low_mean": 0.00040247954029837274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011971656967944, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3883.0, "completions/mean_length": 815.7678833007812, "completions/mean_terminated_length": 524.8117065429688, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 8.19591836734694, "grad_norm": 0.20338168740272522, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 493896981.0, "reward": 0.6975446939468384, "reward_std": 0.138957217335701, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 877 }, { "clip_ratio/high_max": 0.0017884866683743894, "clip_ratio/high_mean": 0.0006116581653259345, "clip_ratio/low_mean": 0.0003461850456005777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009578432036505546, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 999.6339721679688, "completions/mean_terminated_length": 548.2455444335938, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 8.205247813411079, "grad_norm": 0.2069510817527771, "learning_rate": 1e-06, "loss": -0.0338, "num_tokens": 494419029.0, "reward": 0.637276828289032, "reward_std": 0.11761011183261871, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 878 }, { "clip_ratio/high_max": 0.0015876740035309922, "clip_ratio/high_mean": 0.0005098295314382995, "clip_ratio/low_mean": 0.00030483110958812176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008146606378431898, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3362.0, "completions/mean_length": 887.3370971679688, "completions/mean_terminated_length": 551.0406494140625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 8.214577259475218, "grad_norm": 0.2441258728504181, "learning_rate": 1e-06, "loss": -0.0145, "num_tokens": 494958299.0, "reward": 0.6595982313156128, "reward_std": 0.09296473115682602, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 879 }, { "clip_ratio/high_max": 0.0018043315503746271, "clip_ratio/high_mean": 0.0005977879645797657, "clip_ratio/low_mean": 0.0004473864946703543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001045174452883657, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 1023.6138916015625, "completions/mean_terminated_length": 606.953125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 8.223906705539358, "grad_norm": 0.22276468575000763, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 495526337.0, "reward": 0.609375, "reward_std": 0.13872899115085602, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 880 }, { "clip_ratio/high_max": 0.002095451029163087, "clip_ratio/high_mean": 0.0007548647981820977, "clip_ratio/low_mean": 0.0005120580945003894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012669228817685507, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2106.0, "completions/mean_length": 769.6663208007812, "completions/mean_terminated_length": 522.3848876953125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 8.2332361516035, "grad_norm": 0.5053399801254272, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 496057430.0, "reward": 0.6696428656578064, "reward_std": 0.15033671259880066, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 881 }, { "clip_ratio/high_max": 0.0021112427639309317, "clip_ratio/high_mean": 0.0008087911592156161, "clip_ratio/low_mean": 0.00037163476304158394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011804258938354906, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3899.0, "completions/mean_length": 937.7913208007812, "completions/mean_terminated_length": 541.0313720703125, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 8.242565597667639, "grad_norm": 0.22142240405082703, "learning_rate": 1e-06, "loss": -0.0599, "num_tokens": 496591603.0, "reward": 0.6316964626312256, "reward_std": 0.1648743599653244, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 882 }, { "clip_ratio/high_max": 0.0015066460800881032, "clip_ratio/high_mean": 0.0005286981004246627, "clip_ratio/low_mean": 0.0001926557949900598, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007213538719952339, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3166.0, "completions/mean_length": 944.7020263671875, "completions/mean_terminated_length": 544.348388671875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 8.251895043731778, "grad_norm": 0.17582453787326813, "learning_rate": 1e-06, "loss": -0.03, "num_tokens": 497114072.0, "reward": 0.6071428656578064, "reward_std": 0.09622910618782043, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865824937820435, "step": 883 }, { "clip_ratio/high_max": 0.0020494664859143086, "clip_ratio/high_mean": 0.0006650339419138618, "clip_ratio/low_mean": 0.00041972394319600426, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010847578341781627, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3307.0, "completions/mean_length": 872.8348388671875, "completions/mean_terminated_length": 556.8382568359375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 8.261224489795918, "grad_norm": 0.24951384961605072, "learning_rate": 1e-06, "loss": -0.0565, "num_tokens": 497651252.0, "reward": 0.6495535969734192, "reward_std": 0.1560874581336975, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 884 }, { "clip_ratio/high_max": 0.0019271996352472343, "clip_ratio/high_mean": 0.0007097693446667108, "clip_ratio/low_mean": 0.0005364949629438343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012462643389881123, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3198.0, "completions/mean_length": 932.59716796875, "completions/mean_terminated_length": 535.1846923828125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 8.270553935860057, "grad_norm": 0.4013597369194031, "learning_rate": 1e-06, "loss": -0.0687, "num_tokens": 498174699.0, "reward": 0.6495535969734192, "reward_std": 0.1720215380191803, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 885 }, { "clip_ratio/high_max": 0.0023901965796540026, "clip_ratio/high_mean": 0.0008991915856313426, "clip_ratio/low_mean": 0.00040031771254689374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012995092693017796, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3876.0, "completions/mean_length": 1019.1406860351562, "completions/mean_terminated_length": 566.0819702148438, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 8.279883381924199, "grad_norm": 0.23164652287960052, "learning_rate": 1e-06, "loss": -0.0341, "num_tokens": 498707593.0, "reward": 0.6439732313156128, "reward_std": 0.1468448042869568, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 886 }, { "clip_ratio/high_max": 0.0022972855367697775, "clip_ratio/high_mean": 0.0008382612049899762, "clip_ratio/low_mean": 0.0004574144759317278, "clip_ratio/low_min": 3.371089405845851e-05, "clip_ratio/region_mean": 0.0012956756545463577, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2977.0, "completions/mean_length": 838.8549194335938, "completions/mean_terminated_length": 536.97314453125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.289212827988338, "grad_norm": 0.28572994470596313, "learning_rate": 1e-06, "loss": -0.0696, "num_tokens": 499232447.0, "reward": 0.6975446939468384, "reward_std": 0.16679435968399048, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957791805267334, "step": 887 }, { "clip_ratio/high_max": 0.0016594358821748756, "clip_ratio/high_mean": 0.000605644958341145, "clip_ratio/low_mean": 0.0003100996875673445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009157446475001052, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 914.5491333007812, "completions/mean_terminated_length": 550.5025024414062, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 8.298542274052478, "grad_norm": 0.20085084438323975, "learning_rate": 1e-06, "loss": -0.0582, "num_tokens": 499765051.0, "reward": 0.6495535969734192, "reward_std": 0.12414927780628204, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 888 }, { "clip_ratio/high_max": 0.0019504541869537206, "clip_ratio/high_mean": 0.0006448357589761144, "clip_ratio/low_mean": 0.00034611516866789316, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009909509235512814, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2123.0, "completions/mean_length": 894.7344360351562, "completions/mean_terminated_length": 515.0586547851562, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.307871720116617, "grad_norm": 0.20388734340667725, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 500264101.0, "reward": 0.7064732313156128, "reward_std": 0.1365479677915573, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 889 }, { "clip_ratio/high_max": 0.0018714383641054155, "clip_ratio/high_mean": 0.0006585944420294254, "clip_ratio/low_mean": 0.0004814585936401272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011400530456739943, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 944.4051513671875, "completions/mean_terminated_length": 548.4761352539062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 8.317201166180759, "grad_norm": 0.22173303365707397, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 500794280.0, "reward": 0.660714328289032, "reward_std": 0.11419377475976944, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 890 }, { "clip_ratio/high_max": 0.0020545174193102866, "clip_ratio/high_mean": 0.0006875173803564394, "clip_ratio/low_mean": 0.0004447611241857885, "clip_ratio/low_min": 9.802383829082828e-06, "clip_ratio/region_mean": 0.0011322784885123838, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 1030.0648193359375, "completions/mean_terminated_length": 569.5841064453125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 8.326530612244898, "grad_norm": 0.2118057757616043, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 501333930.0, "reward": 0.629464328289032, "reward_std": 0.14004239439964294, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 891 }, { "clip_ratio/high_max": 0.0016472196075483225, "clip_ratio/high_mean": 0.0005582850362770841, "clip_ratio/low_mean": 0.00046393736238314887, "clip_ratio/low_min": 1.8021914002019912e-05, "clip_ratio/region_mean": 0.0010222224009339698, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3394.0, "completions/mean_length": 864.5424194335938, "completions/mean_terminated_length": 525.85693359375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.335860058309038, "grad_norm": 0.22242428362369537, "learning_rate": 1e-06, "loss": -0.0211, "num_tokens": 501848384.0, "reward": 0.6026785969734192, "reward_std": 0.1345216929912567, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 892 }, { "clip_ratio/high_max": 0.001975229210074758, "clip_ratio/high_mean": 0.0006894999714859296, "clip_ratio/low_mean": 0.0004100064293197647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010995063894370105, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2552.0, "completions/mean_length": 858.8270263671875, "completions/mean_terminated_length": 550.14794921875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.345189504373177, "grad_norm": 0.25533241033554077, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 502379541.0, "reward": 0.6618303656578064, "reward_std": 0.13947898149490356, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 893 }, { "clip_ratio/high_max": 0.0020232624028722057, "clip_ratio/high_mean": 0.0006215312896529213, "clip_ratio/low_mean": 0.0004123197107901433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010338510055589722, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3723.0, "completions/mean_length": 851.575927734375, "completions/mean_terminated_length": 515.94580078125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 8.354518950437317, "grad_norm": 0.21104665100574493, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 502891985.0, "reward": 0.621651828289032, "reward_std": 0.12482510507106781, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 894 }, { "clip_ratio/high_max": 0.002325372952327598, "clip_ratio/high_mean": 0.0008392198560613906, "clip_ratio/low_mean": 0.00047660489872214384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013158247311366722, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3856.0, "completions/mean_length": 978.0256958007812, "completions/mean_terminated_length": 590.7239379882812, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.363848396501458, "grad_norm": 0.22268977761268616, "learning_rate": 1e-06, "loss": -0.0614, "num_tokens": 503446264.0, "reward": 0.6160714626312256, "reward_std": 0.18321487307548523, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 895 }, { "clip_ratio/high_max": 0.0014690818279632367, "clip_ratio/high_mean": 0.000583587230721605, "clip_ratio/low_mean": 0.0002704414960135182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008540287126379553, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2168.0, "completions/mean_length": 898.5011596679688, "completions/mean_terminated_length": 550.2586669921875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.373177842565598, "grad_norm": 0.19972148537635803, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 503974257.0, "reward": 0.6875000596046448, "reward_std": 0.11208761483430862, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 896 }, { "clip_ratio/high_max": 0.002112543603288941, "clip_ratio/high_mean": 0.0007068553941280697, "clip_ratio/low_mean": 0.00047607089641132916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001182926273031626, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2223.0, "completions/mean_length": 834.950927734375, "completions/mean_terminated_length": 488.716064453125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.382507288629737, "grad_norm": 0.21627932786941528, "learning_rate": 1e-06, "loss": -0.0355, "num_tokens": 504470837.0, "reward": 0.6819196939468384, "reward_std": 0.14861243963241577, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 897 }, { "clip_ratio/high_max": 0.001663959090365097, "clip_ratio/high_mean": 0.0005463961751956958, "clip_ratio/low_mean": 0.0003685870410663483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009149832330876961, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4071.0, "completions/mean_length": 970.8035888671875, "completions/mean_terminated_length": 546.98095703125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.391836734693877, "grad_norm": 0.21270903944969177, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 504979421.0, "reward": 0.684151828289032, "reward_std": 0.12294787913560867, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 898 }, { "clip_ratio/high_max": 0.002173752825910924, "clip_ratio/high_mean": 0.0009471107168792514, "clip_ratio/low_mean": 0.0005112791423016461, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014583898519049399, "completions/clipped_ratio": 0.078125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 794.5469360351562, "completions/mean_terminated_length": 514.7627563476562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.401166180758018, "grad_norm": 0.2854195237159729, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 505496719.0, "reward": 0.6830357313156128, "reward_std": 0.15169471502304077, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 899 }, { "clip_ratio/high_max": 0.002184569697419647, "clip_ratio/high_mean": 0.0009239231985702645, "clip_ratio/low_mean": 0.0003347971370430969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012587203418661375, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1900.0, "completions/mean_length": 750.4832763671875, "completions/mean_terminated_length": 527.4488525390625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 8.410495626822158, "grad_norm": 0.2775871157646179, "learning_rate": 1e-06, "loss": -0.0097, "num_tokens": 506020176.0, "reward": 0.723214328289032, "reward_std": 0.1670200526714325, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 900 }, { "clip_ratio/high_max": 0.002224650677817408, "clip_ratio/high_mean": 0.0008865884065016871, "clip_ratio/low_mean": 0.0003944913078157697, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012810797088604886, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3087.0, "completions/mean_length": 878.513427734375, "completions/mean_terminated_length": 545.669921875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.419825072886297, "grad_norm": 2.284661293029785, "learning_rate": 1e-06, "loss": -0.0575, "num_tokens": 506546380.0, "reward": 0.6718750596046448, "reward_std": 0.1646900177001953, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 901 }, { "clip_ratio/high_max": 0.001945417607203126, "clip_ratio/high_mean": 0.0006544553716594237, "clip_ratio/low_mean": 0.0003661346249828057, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010205899889115244, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 842.9967041015625, "completions/mean_terminated_length": 502.0530090332031, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 8.429154518950437, "grad_norm": 0.21726374328136444, "learning_rate": 1e-06, "loss": -0.0618, "num_tokens": 507034673.0, "reward": 0.7287946939468384, "reward_std": 0.13752618432044983, "rewards/verify_math_reward/mean": 0.7287946343421936, "rewards/verify_math_reward/std": 0.44483017921447754, "step": 902 }, { "clip_ratio/high_max": 0.0020464137851377018, "clip_ratio/high_mean": 0.0007443920094374334, "clip_ratio/low_mean": 0.0003264422643951548, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010708342997531872, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3798.0, "completions/mean_length": 941.388427734375, "completions/mean_terminated_length": 531.6469116210938, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.438483965014576, "grad_norm": 0.5861804485321045, "learning_rate": 1e-06, "loss": -0.0497, "num_tokens": 507543701.0, "reward": 0.6741071939468384, "reward_std": 0.13854604959487915, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 903 }, { "clip_ratio/high_max": 0.0020555760729621397, "clip_ratio/high_mean": 0.0006632981621805811, "clip_ratio/low_mean": 0.0002628438535339228, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009261420582333812, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 945.7589721679688, "completions/mean_terminated_length": 580.9115600585938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 8.447813411078718, "grad_norm": 0.21933381259441376, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 508106973.0, "reward": 0.6395089626312256, "reward_std": 0.11888891458511353, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 904 }, { "clip_ratio/high_max": 0.002727092505665496, "clip_ratio/high_mean": 0.0008832131898088846, "clip_ratio/low_mean": 0.00044514592991617974, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013283591251820326, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2117.0, "completions/mean_length": 908.4420166015625, "completions/mean_terminated_length": 521.4668579101562, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.457142857142857, "grad_norm": 0.2372286021709442, "learning_rate": 1e-06, "loss": -0.0575, "num_tokens": 508619489.0, "reward": 0.6886160969734192, "reward_std": 0.13755826652050018, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 905 }, { "clip_ratio/high_max": 0.0016336179396603256, "clip_ratio/high_mean": 0.0005315077278282843, "clip_ratio/low_mean": 0.00032338370465367916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008548914356651949, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2829.0, "completions/mean_length": 890.54248046875, "completions/mean_terminated_length": 514.8403930664062, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 8.466472303206997, "grad_norm": 0.2048484832048416, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 509118311.0, "reward": 0.6975446939468384, "reward_std": 0.10788102447986603, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 906 }, { "clip_ratio/high_max": 0.0014587766454496887, "clip_ratio/high_mean": 0.0005412365135271102, "clip_ratio/low_mean": 0.0004826612894248683, "clip_ratio/low_min": 1.4945002476451918e-05, "clip_ratio/region_mean": 0.0010238978393317666, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4069.0, "completions/mean_length": 885.6641235351562, "completions/mean_terminated_length": 549.1923217773438, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.475801749271136, "grad_norm": 0.2174452394247055, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 509649154.0, "reward": 0.606026828289032, "reward_std": 0.1304224729537964, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 907 }, { "clip_ratio/high_max": 0.0020289599560783245, "clip_ratio/high_mean": 0.000711862368916627, "clip_ratio/low_mean": 0.00047021469345054356, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00118207704872475, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 1061.375, "completions/mean_terminated_length": 573.9481811523438, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 8.485131195335278, "grad_norm": 0.24835245311260223, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 510175474.0, "reward": 0.6350446939468384, "reward_std": 0.14920946955680847, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 908 }, { "clip_ratio/high_max": 0.0018362417395110242, "clip_ratio/high_mean": 0.0006620414878852898, "clip_ratio/low_mean": 0.0003299538598184881, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009919953627104405, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2415.0, "completions/mean_length": 860.3170166015625, "completions/mean_terminated_length": 516.7753295898438, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.494460641399417, "grad_norm": 0.22667130827903748, "learning_rate": 1e-06, "loss": -0.0244, "num_tokens": 510680118.0, "reward": 0.660714328289032, "reward_std": 0.14849267899990082, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 909 }, { "clip_ratio/high_max": 0.0022997462438070215, "clip_ratio/high_mean": 0.0007791659063514089, "clip_ratio/low_mean": 0.0003796070168391452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011587728877202608, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2895.0, "completions/mean_length": 906.6897583007812, "completions/mean_terminated_length": 541.7437744140625, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 8.503790087463557, "grad_norm": 0.3134598731994629, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 511215744.0, "reward": 0.6305803656578064, "reward_std": 0.14091253280639648, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 910 }, { "clip_ratio/high_max": 0.0016868900565896183, "clip_ratio/high_mean": 0.0005501292662302149, "clip_ratio/low_mean": 0.00036749553237314103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000917624767680536, "completions/clipped_ratio": 0.0580357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2369.0, "completions/mean_length": 702.9408569335938, "completions/mean_terminated_length": 493.88983154296875, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.513119533527696, "grad_norm": 0.241309255361557, "learning_rate": 1e-06, "loss": -0.0348, "num_tokens": 511721443.0, "reward": 0.7287946939468384, "reward_std": 0.12366396188735962, "rewards/verify_math_reward/mean": 0.7287946343421936, "rewards/verify_math_reward/std": 0.44483017921447754, "step": 911 }, { "clip_ratio/high_max": 0.0015970728927641176, "clip_ratio/high_mean": 0.0004294130103517091, "clip_ratio/low_mean": 0.0005405297551988042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009699427773739444, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3722.0, "completions/mean_length": 985.3460083007812, "completions/mean_terminated_length": 567.9671020507812, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 8.522448979591836, "grad_norm": 0.1997060477733612, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 512264337.0, "reward": 0.5345982313156128, "reward_std": 0.11272881925106049, "rewards/verify_math_reward/mean": 0.5345982313156128, "rewards/verify_math_reward/std": 0.4990801215171814, "step": 912 }, { "clip_ratio/high_max": 0.0018318583206564654, "clip_ratio/high_mean": 0.0007385027111013187, "clip_ratio/low_mean": 0.0005207276090004598, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012592303100973368, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3824.0, "completions/mean_length": 937.3973388671875, "completions/mean_terminated_length": 562.781494140625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.531778425655977, "grad_norm": 0.24116960167884827, "learning_rate": 1e-06, "loss": -0.0569, "num_tokens": 512800413.0, "reward": 0.637276828289032, "reward_std": 0.15315966308116913, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 913 }, { "clip_ratio/high_max": 0.0018709901087277103, "clip_ratio/high_mean": 0.0006881550307298312, "clip_ratio/low_mean": 0.0003869075301281555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010750625551736448, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2539.0, "completions/mean_length": 855.3984985351562, "completions/mean_terminated_length": 480.0859069824219, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.541107871720117, "grad_norm": 0.22416174411773682, "learning_rate": 1e-06, "loss": -0.0634, "num_tokens": 513280186.0, "reward": 0.6908482313156128, "reward_std": 0.14774663746356964, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 914 }, { "clip_ratio/high_max": 0.0021866236202185974, "clip_ratio/high_mean": 0.0006154387301648967, "clip_ratio/low_mean": 0.000243381276504806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008588199998484924, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1616.0, "completions/mean_length": 781.6763916015625, "completions/mean_terminated_length": 505.14874267578125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 8.550437317784256, "grad_norm": 0.21724767982959747, "learning_rate": 1e-06, "loss": -0.0356, "num_tokens": 513786520.0, "reward": 0.7020089626312256, "reward_std": 0.11768428236246109, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 915 }, { "clip_ratio/high_max": 0.001643316831177799, "clip_ratio/high_mean": 0.0006447542600653833, "clip_ratio/low_mean": 0.0003459509134700056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009907052044582088, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2622.0, "completions/mean_length": 1011.30810546875, "completions/mean_terminated_length": 601.835693359375, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 8.559766763848396, "grad_norm": 0.2337225377559662, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 514361588.0, "reward": 0.543526828289032, "reward_std": 0.1481991559267044, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 916 }, { "clip_ratio/high_max": 0.0019296219725219999, "clip_ratio/high_mean": 0.0006793836455472047, "clip_ratio/low_mean": 0.0003670189353215392, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001046402583597228, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2867.0, "completions/mean_length": 949.7176513671875, "completions/mean_terminated_length": 532.069580078125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.569096209912537, "grad_norm": 0.23076514899730682, "learning_rate": 1e-06, "loss": -0.0488, "num_tokens": 514870863.0, "reward": 0.6205357313156128, "reward_std": 0.13429418206214905, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 917 }, { "clip_ratio/high_max": 0.0016397672079619952, "clip_ratio/high_mean": 0.0005472286702570273, "clip_ratio/low_mean": 0.000468710894438118, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010159395496884827, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 926.8660888671875, "completions/mean_terminated_length": 537.6741943359375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 8.578425655976677, "grad_norm": 0.21286757290363312, "learning_rate": 1e-06, "loss": -0.0291, "num_tokens": 515393903.0, "reward": 0.6194196939468384, "reward_std": 0.1350095570087433, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 918 }, { "clip_ratio/high_max": 0.0015756159627926536, "clip_ratio/high_mean": 0.000557405535801081, "clip_ratio/low_mean": 0.0005766614485764876, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011340670062054414, "completions/clipped_ratio": 0.0669642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2044.0, "completions/mean_length": 742.7142944335938, "completions/mean_terminated_length": 502.0478210449219, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 8.587755102040816, "grad_norm": 6717.05322265625, "learning_rate": 1e-06, "loss": 0.2463, "num_tokens": 515909135.0, "reward": 0.7087053656578064, "reward_std": 0.15733122825622559, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 919 }, { "clip_ratio/high_max": 0.002001937074965099, "clip_ratio/high_mean": 0.0007161392595662619, "clip_ratio/low_mean": 0.000455452853202587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011715921100403648, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3228.0, "completions/mean_length": 972.87060546875, "completions/mean_terminated_length": 571.6624755859375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 8.597084548104956, "grad_norm": 0.21933098137378693, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 516445763.0, "reward": 0.6194196939468384, "reward_std": 0.16037102043628693, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 920 }, { "clip_ratio/high_max": 0.0021662593208020553, "clip_ratio/high_mean": 0.0007581191330245929, "clip_ratio/low_mean": 0.0004433449021234992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012014640124107245, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3997.0, "completions/mean_length": 962.029052734375, "completions/mean_terminated_length": 509.74456787109375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.606413994169095, "grad_norm": 0.23349839448928833, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 516927573.0, "reward": 0.668526828289032, "reward_std": 0.1288815438747406, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 921 }, { "clip_ratio/high_max": 0.001967736450751545, "clip_ratio/high_mean": 0.0006042885925126029, "clip_ratio/low_mean": 0.0004970809841324808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011013695402652957, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2446.0, "completions/mean_length": 787.2064819335938, "completions/mean_terminated_length": 566.6202392578125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 8.615743440233237, "grad_norm": 0.26429736614227295, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 517487646.0, "reward": 0.6316964626312256, "reward_std": 0.14624707400798798, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 922 }, { "clip_ratio/high_max": 0.0020967237251170445, "clip_ratio/high_mean": 0.0007870002773415763, "clip_ratio/low_mean": 0.0003176267155140522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011046270083170384, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3753.0, "completions/mean_length": 974.7824096679688, "completions/mean_terminated_length": 528.8941040039062, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 8.625072886297376, "grad_norm": 0.23378008604049683, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 517998483.0, "reward": 0.6674107313156128, "reward_std": 0.13219164311885834, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 923 }, { "clip_ratio/high_max": 0.0022776820114813745, "clip_ratio/high_mean": 0.0009351541120850015, "clip_ratio/low_mean": 0.0004110058025617036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013461599010042846, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2214.0, "completions/mean_length": 871.1205444335938, "completions/mean_terminated_length": 497.6288757324219, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 8.634402332361516, "grad_norm": 0.2799464166164398, "learning_rate": 1e-06, "loss": -0.0368, "num_tokens": 518490399.0, "reward": 0.6551339626312256, "reward_std": 0.15030533075332642, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 924 }, { "clip_ratio/high_max": 0.001753624834236689, "clip_ratio/high_mean": 0.0005756567447861016, "clip_ratio/low_mean": 0.0005050633108112379, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010807200560520869, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 943.169677734375, "completions/mean_terminated_length": 542.6213989257812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 8.643731778425655, "grad_norm": 0.31731176376342773, "learning_rate": 1e-06, "loss": -0.0348, "num_tokens": 519013759.0, "reward": 0.6272321939468384, "reward_std": 0.14522789418697357, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 925 }, { "clip_ratio/high_max": 0.0019128205167362466, "clip_ratio/high_mean": 0.0006273483968470828, "clip_ratio/low_mean": 0.000485394918541715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011127433135698084, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 910.247802734375, "completions/mean_terminated_length": 523.4918823242188, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 8.653061224489797, "grad_norm": 0.2529394030570984, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 519519909.0, "reward": 0.6551339626312256, "reward_std": 0.15259189903736115, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 926 }, { "clip_ratio/high_max": 0.001869158870249521, "clip_ratio/high_mean": 0.0006110826516305679, "clip_ratio/low_mean": 0.0002701722187339328, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008812548621790484, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2848.0, "completions/mean_length": 936.6428833007812, "completions/mean_terminated_length": 508.18756103515625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 8.662390670553936, "grad_norm": 0.20568110048770905, "learning_rate": 1e-06, "loss": -0.0268, "num_tokens": 520012085.0, "reward": 0.621651828289032, "reward_std": 0.11314070224761963, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 927 }, { "clip_ratio/high_max": 0.0019109445747744758, "clip_ratio/high_mean": 0.0006054648492863635, "clip_ratio/low_mean": 0.0004211364412185503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010266013396176277, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 1008.6328735351562, "completions/mean_terminated_length": 535.7927856445312, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 8.671720116618076, "grad_norm": 0.25297918915748596, "learning_rate": 1e-06, "loss": -0.0326, "num_tokens": 520517500.0, "reward": 0.5926339626312256, "reward_std": 0.13929352164268494, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 928 }, { "clip_ratio/high_max": 0.0017755872031557374, "clip_ratio/high_mean": 0.000656154577882262, "clip_ratio/low_mean": 0.0004934472926834133, "clip_ratio/low_min": 1.6850903193699196e-05, "clip_ratio/region_mean": 0.0011496018632897176, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3596.0, "completions/mean_length": 926.3795166015625, "completions/mean_terminated_length": 541.5819702148438, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 8.681049562682215, "grad_norm": 0.28964167833328247, "learning_rate": 1e-06, "loss": -0.0405, "num_tokens": 521038920.0, "reward": 0.6328125, "reward_std": 0.16314727067947388, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 929 }, { "clip_ratio/high_max": 0.002014971847529523, "clip_ratio/high_mean": 0.0006176638726174133, "clip_ratio/low_mean": 0.00044335930124361767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010610231838654727, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2858.0, "completions/mean_length": 995.2109985351562, "completions/mean_terminated_length": 501.81243896484375, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.690379008746355, "grad_norm": 0.24830204248428345, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 521513133.0, "reward": 0.684151828289032, "reward_std": 0.11930189281702042, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 930 }, { "clip_ratio/high_max": 0.0019216491382394452, "clip_ratio/high_mean": 0.0007386029656117898, "clip_ratio/low_mean": 0.0005430868377516163, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012816898488381412, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 1096.6685791015625, "completions/mean_terminated_length": 605.8688354492188, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 8.699708454810496, "grad_norm": 0.2941707968711853, "learning_rate": 1e-06, "loss": -0.0534, "num_tokens": 522073092.0, "reward": 0.5948660969734192, "reward_std": 0.16905026137828827, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.491192102432251, "step": 931 }, { "clip_ratio/high_max": 0.001703739591903286, "clip_ratio/high_mean": 0.000559276007606968, "clip_ratio/low_mean": 0.0004476770636756555, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010069530726468656, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3916.0, "completions/mean_length": 907.87841796875, "completions/mean_terminated_length": 551.8845825195312, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 8.709037900874636, "grad_norm": 0.23361043632030487, "learning_rate": 1e-06, "loss": -0.0452, "num_tokens": 522611519.0, "reward": 0.6082589626312256, "reward_std": 0.1391758769750595, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.4884119927883148, "step": 932 }, { "clip_ratio/high_max": 0.0016184208434424363, "clip_ratio/high_mean": 0.0005095614214951638, "clip_ratio/low_mean": 0.0004677145607274724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009772759967745515, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 1032.6529541015625, "completions/mean_terminated_length": 568.0321044921875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.718367346938775, "grad_norm": 0.29861971735954285, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 523142136.0, "reward": 0.6149553656578064, "reward_std": 0.12133137136697769, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 933 }, { "clip_ratio/high_max": 0.0016488937071699183, "clip_ratio/high_mean": 0.00060257361838012, "clip_ratio/low_mean": 0.0005295234968798468, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011320971370878397, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2773.0, "completions/mean_length": 1023.216552734375, "completions/mean_terminated_length": 561.7073364257812, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.727696793002915, "grad_norm": 0.29928725957870483, "learning_rate": 1e-06, "loss": -0.0473, "num_tokens": 523665874.0, "reward": 0.6049107313156128, "reward_std": 0.13042065501213074, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 934 }, { "clip_ratio/high_max": 0.002083321516693104, "clip_ratio/high_mean": 0.0008094834902294679, "clip_ratio/low_mean": 0.0005263916391413659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013358751093619503, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2732.0, "completions/mean_length": 977.9129638671875, "completions/mean_terminated_length": 537.011474609375, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 8.737026239067056, "grad_norm": 0.2581687569618225, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 524166892.0, "reward": 0.6517857313156128, "reward_std": 0.1559034138917923, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 935 }, { "clip_ratio/high_max": 0.0020345321245258674, "clip_ratio/high_mean": 0.0008072499858826632, "clip_ratio/low_mean": 0.0005050225477134518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013122725431458093, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3878.0, "completions/mean_length": 855.7857666015625, "completions/mean_terminated_length": 516.1824951171875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 8.746355685131196, "grad_norm": 0.6368719339370728, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 524673628.0, "reward": 0.6171875, "reward_std": 0.15942852199077606, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 936 }, { "clip_ratio/high_max": 0.0015030265221867012, "clip_ratio/high_mean": 0.0005101862302581139, "clip_ratio/low_mean": 0.00029400829180303845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008041945093282266, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3443.0, "completions/mean_length": 864.7701416015625, "completions/mean_terminated_length": 526.1085205078125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 8.755685131195335, "grad_norm": 0.17452889680862427, "learning_rate": 1e-06, "loss": -0.0331, "num_tokens": 525193518.0, "reward": 0.7020089626312256, "reward_std": 0.10333602875471115, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 937 }, { "clip_ratio/high_max": 0.0017282160697504878, "clip_ratio/high_mean": 0.0007196230753834243, "clip_ratio/low_mean": 0.000523139514370996, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012427625842974521, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2514.0, "completions/mean_length": 1067.56591796875, "completions/mean_terminated_length": 572.00390625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 8.765014577259475, "grad_norm": 0.2921406924724579, "learning_rate": 1e-06, "loss": -0.0736, "num_tokens": 525720633.0, "reward": 0.613839328289032, "reward_std": 0.16927596926689148, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 938 }, { "clip_ratio/high_max": 0.002149793963326374, "clip_ratio/high_mean": 0.0007308811664188397, "clip_ratio/low_mean": 0.00017927868805145408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009101598698180169, "completions/clipped_ratio": 0.0680803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 1987.0, "completions/mean_length": 713.154052734375, "completions/mean_terminated_length": 466.0239562988281, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 8.774344023323614, "grad_norm": 0.25510385632514954, "learning_rate": 1e-06, "loss": -0.0557, "num_tokens": 526193739.0, "reward": 0.738839328289032, "reward_std": 0.11866319924592972, "rewards/verify_math_reward/mean": 0.7388392686843872, "rewards/verify_math_reward/std": 0.439512699842453, "step": 939 }, { "clip_ratio/high_max": 0.002407035426585935, "clip_ratio/high_mean": 0.00088339581816399, "clip_ratio/low_mean": 0.0004453325391295948, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001328728350927122, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 1051.930908203125, "completions/mean_terminated_length": 535.3133544921875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.783673469387756, "grad_norm": 0.2580684721469879, "learning_rate": 1e-06, "loss": -0.0513, "num_tokens": 526697469.0, "reward": 0.625, "reward_std": 0.15030603110790253, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 940 }, { "clip_ratio/high_max": 0.0016847862389113288, "clip_ratio/high_mean": 0.0005231068853390752, "clip_ratio/low_mean": 0.00046479639240715187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009879032804747112, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3600.0, "completions/mean_length": 839.4877319335938, "completions/mean_terminated_length": 507.02703857421875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 8.793002915451895, "grad_norm": 0.275890052318573, "learning_rate": 1e-06, "loss": -0.0487, "num_tokens": 527188746.0, "reward": 0.6752232313156128, "reward_std": 0.14004167914390564, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 941 }, { "clip_ratio/high_max": 0.002201137878728332, "clip_ratio/high_mean": 0.0007997457887540804, "clip_ratio/low_mean": 0.00037658151813957375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011763273359974846, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3335.0, "completions/mean_length": 924.200927734375, "completions/mean_terminated_length": 512.2269897460938, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 8.802332361516035, "grad_norm": 0.2549758851528168, "learning_rate": 1e-06, "loss": -0.0494, "num_tokens": 527680038.0, "reward": 0.6417410969734192, "reward_std": 0.13260029256343842, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 942 }, { "clip_ratio/high_max": 0.0018307962527615018, "clip_ratio/high_mean": 0.0006916166012160829, "clip_ratio/low_mean": 0.0003792502388932917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010708668378356379, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3613.0, "completions/mean_length": 781.2344360351562, "completions/mean_terminated_length": 530.5377807617188, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 8.811661807580174, "grad_norm": 0.21261122822761536, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 528212112.0, "reward": 0.6741071939468384, "reward_std": 0.126027911901474, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 943 }, { "clip_ratio/high_max": 0.0016578540744376369, "clip_ratio/high_mean": 0.0006712314716423862, "clip_ratio/low_mean": 0.0002848696722139721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009561011211189907, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3646.0, "completions/mean_length": 962.4420166015625, "completions/mean_terminated_length": 564.3421020507812, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 8.820991253644316, "grad_norm": 0.2269987165927887, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 528746356.0, "reward": 0.6350446939468384, "reward_std": 0.14681048691272736, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 944 }, { "clip_ratio/high_max": 0.002148511994164437, "clip_ratio/high_mean": 0.0007235466182464734, "clip_ratio/low_mean": 0.0003367772960700677, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010603239170450252, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2865.0, "completions/mean_length": 890.5558471679688, "completions/mean_terminated_length": 528.2012329101562, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 8.830320699708455, "grad_norm": 0.23371781408786774, "learning_rate": 1e-06, "loss": -0.0521, "num_tokens": 529255430.0, "reward": 0.6551339626312256, "reward_std": 0.13041996955871582, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900800228119, "step": 945 }, { "clip_ratio/high_max": 0.0021029542622272857, "clip_ratio/high_mean": 0.000874880613082496, "clip_ratio/low_mean": 0.0004403387274578563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013152193387213629, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3647.0, "completions/mean_length": 892.3114013671875, "completions/mean_terminated_length": 516.8167114257812, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 8.839650145772595, "grad_norm": 0.2372330278158188, "learning_rate": 1e-06, "loss": -0.082, "num_tokens": 529753661.0, "reward": 0.6696428656578064, "reward_std": 0.1595045030117035, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 946 }, { "clip_ratio/high_max": 0.001694159542239504, "clip_ratio/high_mean": 0.0005554514391405974, "clip_ratio/low_mean": 0.0002972530833176279, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008527045392838772, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3460.0, "completions/mean_length": 883.1272583007812, "completions/mean_terminated_length": 528.7955322265625, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 8.848979591836734, "grad_norm": 0.20902705192565918, "learning_rate": 1e-06, "loss": -0.0318, "num_tokens": 530262919.0, "reward": 0.640625, "reward_std": 0.11396483331918716, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 947 }, { "clip_ratio/high_max": 0.0019944397208746523, "clip_ratio/high_mean": 0.0007265213989740005, "clip_ratio/low_mean": 0.0003158008330501616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010423222338431515, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3685.0, "completions/mean_length": 911.2109985351562, "completions/mean_terminated_length": 590.384521484375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 8.858309037900874, "grad_norm": 0.22213862836360931, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 530833612.0, "reward": 0.6674107313156128, "reward_std": 0.13639894127845764, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 948 }, { "clip_ratio/high_max": 0.0017291305521212053, "clip_ratio/high_mean": 0.0006730979021085659, "clip_ratio/low_mean": 0.00032835083197824133, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001001448712486308, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2863.0, "completions/mean_length": 921.8995971679688, "completions/mean_terminated_length": 532.0977172851562, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 8.867638483965015, "grad_norm": 0.21696864068508148, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 531344994.0, "reward": 0.6328125, "reward_std": 0.15138868987560272, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 949 }, { "clip_ratio/high_max": 0.001585699350471259, "clip_ratio/high_mean": 0.0006043091252649901, "clip_ratio/low_mean": 0.00048170495847443817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010860140719159972, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2396.0, "completions/mean_length": 1003.1495971679688, "completions/mean_terminated_length": 534.053955078125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 8.876967930029155, "grad_norm": 0.22879809141159058, "learning_rate": 1e-06, "loss": -0.0577, "num_tokens": 531850544.0, "reward": 0.6339285969734192, "reward_std": 0.13685175776481628, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 950 }, { "clip_ratio/high_max": 0.002322256677871337, "clip_ratio/high_mean": 0.0008632607587060193, "clip_ratio/low_mean": 0.00039092526469630684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012541860232886393, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 930.66748046875, "completions/mean_terminated_length": 541.9423828125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 8.886297376093294, "grad_norm": 0.3646446466445923, "learning_rate": 1e-06, "loss": -0.0454, "num_tokens": 532377054.0, "reward": 0.6149553656578064, "reward_std": 0.14027062058448792, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 951 }, { "clip_ratio/high_max": 0.0020843351157964207, "clip_ratio/high_mean": 0.0008173993301170412, "clip_ratio/low_mean": 0.0003183621884090826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001135761533078039, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3512.0, "completions/mean_length": 880.3582763671875, "completions/mean_terminated_length": 552.0701293945312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 8.895626822157434, "grad_norm": 0.25953951478004456, "learning_rate": 1e-06, "loss": -0.0388, "num_tokens": 532915935.0, "reward": 0.676339328289032, "reward_std": 0.14417551457881927, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335687637329, "step": 952 }, { "clip_ratio/high_max": 0.0023956390250532422, "clip_ratio/high_mean": 0.0009061027431016555, "clip_ratio/low_mean": 0.0004230161107443564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013291188261064235, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 931.810302734375, "completions/mean_terminated_length": 529.81884765625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 8.904956268221575, "grad_norm": 0.2815774381160736, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 533428317.0, "reward": 0.6417410969734192, "reward_std": 0.17223837971687317, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 953 }, { "clip_ratio/high_max": 0.0023849601711845025, "clip_ratio/high_mean": 0.0007930019182822434, "clip_ratio/low_mean": 0.00044139458168501733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001234396520885639, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2789.0, "completions/mean_length": 759.6663208007812, "completions/mean_terminated_length": 511.6415100097656, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 8.914285714285715, "grad_norm": 0.9295297265052795, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 533946258.0, "reward": 0.684151828289032, "reward_std": 0.12561675906181335, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 954 }, { "clip_ratio/high_max": 0.002032962493103696, "clip_ratio/high_mean": 0.000756587736759684, "clip_ratio/low_mean": 0.0005395033485910972, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001296091046242509, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 868.7332763671875, "completions/mean_terminated_length": 547.9865112304688, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 8.923615160349854, "grad_norm": 0.3900900185108185, "learning_rate": 1e-06, "loss": -0.0334, "num_tokens": 534476587.0, "reward": 0.6774553656578064, "reward_std": 0.15353691577911377, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 955 }, { "clip_ratio/high_max": 0.001982136134756729, "clip_ratio/high_mean": 0.0006743329922755947, "clip_ratio/low_mean": 0.0004361085129858111, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011104415207228158, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3267.0, "completions/mean_length": 947.0156860351562, "completions/mean_terminated_length": 546.9559936523438, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 8.932944606413994, "grad_norm": 0.2565886974334717, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 535001649.0, "reward": 0.6261160969734192, "reward_std": 0.12456297874450684, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 956 }, { "clip_ratio/high_max": 0.0018949311051983386, "clip_ratio/high_mean": 0.0006435758623410948, "clip_ratio/low_mean": 0.00041794130288508313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010615171722747618, "completions/clipped_ratio": 0.1696428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3170.0, "completions/mean_length": 1151.618408203125, "completions/mean_terminated_length": 550.0779418945312, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 8.942274052478133, "grad_norm": 0.26293376088142395, "learning_rate": 1e-06, "loss": -0.0784, "num_tokens": 535502587.0, "reward": 0.543526828289032, "reward_std": 0.14815960824489594, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 957 }, { "clip_ratio/high_max": 0.0017872969001473393, "clip_ratio/high_mean": 0.0005414857291725639, "clip_ratio/low_mean": 0.000530541659827577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010720273967308458, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2626.0, "completions/mean_length": 986.763427734375, "completions/mean_terminated_length": 591.7534790039062, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 8.951603498542275, "grad_norm": 0.8553789258003235, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 536068279.0, "reward": 0.6127232313156128, "reward_std": 0.13914379477500916, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 958 }, { "clip_ratio/high_max": 0.0019308765076857526, "clip_ratio/high_mean": 0.0006775250167265767, "clip_ratio/low_mean": 0.0005201590511205723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011976840578427073, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 1032.8671875, "completions/mean_terminated_length": 522.3450927734375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 8.960932944606414, "grad_norm": 0.24272368848323822, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 536560648.0, "reward": 0.6618303656578064, "reward_std": 0.13568215072155, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 959 }, { "clip_ratio/high_max": 0.0023686804706812836, "clip_ratio/high_mean": 0.0007480191306967754, "clip_ratio/low_mean": 0.0004042846069296502, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011523037137521897, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3794.0, "completions/mean_length": 1030.0546875, "completions/mean_terminated_length": 574.0936279296875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 8.970262390670554, "grad_norm": 0.8244277834892273, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 537105161.0, "reward": 0.6160714626312256, "reward_std": 0.13760243356227875, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 960 }, { "clip_ratio/high_max": 0.0020416714178281836, "clip_ratio/high_mean": 0.0008604426293459255, "clip_ratio/low_mean": 0.0004435464015841717, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013039890363870654, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3019.0, "completions/mean_length": 832.083740234375, "completions/mean_terminated_length": 529.5744018554688, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 8.979591836734693, "grad_norm": 0.27510568499565125, "learning_rate": 1e-06, "loss": -0.0473, "num_tokens": 537627972.0, "reward": 0.6930803656578064, "reward_std": 0.18118606507778168, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 961 }, { "clip_ratio/high_max": 0.0020786139648407698, "clip_ratio/high_mean": 0.0008178072894224897, "clip_ratio/low_mean": 0.0006200714051374234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001437878621800337, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3749.0, "completions/mean_length": 892.8850708007812, "completions/mean_terminated_length": 548.4215087890625, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 8.988921282798835, "grad_norm": 0.32806262373924255, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 538168917.0, "reward": 0.6417410969734192, "reward_std": 0.1789316087961197, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 962 }, { "clip_ratio/high_max": 0.0019751785475818906, "clip_ratio/high_mean": 0.0007488408955396153, "clip_ratio/low_mean": 0.0004993245383957401, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012481654557632282, "completions/clipped_ratio": 0.09943181818181823, "completions/max_length": 4096.0, "completions/max_terminated_length": 1380.0, "completions/mean_length": 922.9005737304688, "completions/mean_terminated_length": 572.558349609375, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 8.998250728862974, "grad_norm": 0.25778788328170776, "learning_rate": 1e-06, "loss": -0.0481, "num_tokens": 538719050.0, "reward": 0.5848214626312256, "reward_std": 0.1367429792881012, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 963 }, { "clip_ratio/high_max": 0.0020093115599593148, "clip_ratio/high_mean": 0.0008440847905148985, "clip_ratio/low_mean": 0.00047851414274191484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001322598931437824, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 919.58935546875, "completions/mean_terminated_length": 599.6068725585938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.00932944606414, "grad_norm": 0.203589528799057, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 539299338.0, "reward": 0.6662946939468384, "reward_std": 0.168679341673851, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 964 }, { "clip_ratio/high_max": 0.0018228169428766705, "clip_ratio/high_mean": 0.0006125026775407605, "clip_ratio/low_mean": 0.00035421171560301445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009667143822298385, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3509.0, "completions/mean_length": 863.7723388671875, "completions/mean_terminated_length": 493.9154052734375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 9.018658892128279, "grad_norm": 0.23763151466846466, "learning_rate": 1e-06, "loss": -0.0405, "num_tokens": 539782430.0, "reward": 0.6897321939468384, "reward_std": 0.1356828510761261, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.462861567735672, "step": 965 }, { "clip_ratio/high_max": 0.0022057854148442857, "clip_ratio/high_mean": 0.0008197773859137669, "clip_ratio/low_mean": 0.0005298856394801987, "clip_ratio/low_min": 1.5424482626258396e-05, "clip_ratio/region_mean": 0.0013496630308509339, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4044.0, "completions/mean_length": 811.2801513671875, "completions/mean_terminated_length": 524.2658081054688, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 9.02798833819242, "grad_norm": 0.2678503096103668, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 540311097.0, "reward": 0.6640625, "reward_std": 0.16074052453041077, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 966 }, { "clip_ratio/high_max": 0.0021756058376922738, "clip_ratio/high_mean": 0.0007414086248900276, "clip_ratio/low_mean": 0.0005067757974757114, "clip_ratio/low_min": 1.4751002709090244e-05, "clip_ratio/region_mean": 0.0012481844096328132, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1880.0, "completions/mean_length": 967.0558471679688, "completions/mean_terminated_length": 520.0637817382812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.03731778425656, "grad_norm": 2.595151424407959, "learning_rate": 1e-06, "loss": -0.0549, "num_tokens": 540809227.0, "reward": 0.6339285969734192, "reward_std": 0.14368806779384613, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 967 }, { "clip_ratio/high_max": 0.0016082822221505921, "clip_ratio/high_mean": 0.0006099893516875454, "clip_ratio/low_mean": 0.00024320421096035716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008531935691280523, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3626.0, "completions/mean_length": 966.1942138671875, "completions/mean_terminated_length": 573.0025024414062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.0466472303207, "grad_norm": 0.1958475559949875, "learning_rate": 1e-06, "loss": -0.0471, "num_tokens": 541371185.0, "reward": 0.609375, "reward_std": 0.12587566673755646, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 968 }, { "clip_ratio/high_max": 0.0021028903356636874, "clip_ratio/high_mean": 0.0007086892273946432, "clip_ratio/low_mean": 0.0004200470975774806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011287362885923358, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 828.036865234375, "completions/mean_terminated_length": 538.1689453125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.055976676384839, "grad_norm": 0.20024479925632477, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 541898738.0, "reward": 0.6897321939468384, "reward_std": 0.12092021852731705, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.462861567735672, "step": 969 }, { "clip_ratio/high_max": 0.002064900614641374, "clip_ratio/high_mean": 0.0007238188782139332, "clip_ratio/low_mean": 0.00025628001299082825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009800989018913242, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2521.0, "completions/mean_length": 778.5904541015625, "completions/mean_terminated_length": 488.71966552734375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 9.06530612244898, "grad_norm": 0.26544249057769775, "learning_rate": 1e-06, "loss": -0.0321, "num_tokens": 542385971.0, "reward": 0.7064732313156128, "reward_std": 0.12343642115592957, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 970 }, { "clip_ratio/high_max": 0.0017448019916628255, "clip_ratio/high_mean": 0.0006562436042258923, "clip_ratio/low_mean": 0.0005531991464522434, "clip_ratio/low_min": 1.3001872503082268e-05, "clip_ratio/region_mean": 0.0012094427802367136, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3242.0, "completions/mean_length": 965.7210083007812, "completions/mean_terminated_length": 568.0377197265625, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 9.07463556851312, "grad_norm": 0.21854481101036072, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 542929969.0, "reward": 0.621651828289032, "reward_std": 0.14556489884853363, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.485245943069458, "step": 971 }, { "clip_ratio/high_max": 0.0016820623168314341, "clip_ratio/high_mean": 0.00045042975625619874, "clip_ratio/low_mean": 0.0004390708999153503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008895006576494779, "completions/clipped_ratio": 0.0770089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3163.0, "completions/mean_length": 796.8638916015625, "completions/mean_terminated_length": 521.6033935546875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 9.08396501457726, "grad_norm": 0.3739985525608063, "learning_rate": 1e-06, "loss": -0.0221, "num_tokens": 543444255.0, "reward": 0.6964285969734192, "reward_std": 0.12035498023033142, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 972 }, { "clip_ratio/high_max": 0.0017770100057532545, "clip_ratio/high_mean": 0.0006339427545754006, "clip_ratio/low_mean": 0.00034976727511093486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000983710040600272, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2004.0, "completions/mean_length": 835.1116333007812, "completions/mean_terminated_length": 554.4776000976562, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 9.093294460641399, "grad_norm": 0.202556312084198, "learning_rate": 1e-06, "loss": -0.0398, "num_tokens": 543989723.0, "reward": 0.6886160969734192, "reward_std": 0.1393256038427353, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 973 }, { "clip_ratio/high_max": 0.0018257257870573085, "clip_ratio/high_mean": 0.0004840030205741641, "clip_ratio/low_mean": 0.0004987927691217919, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009827957801462617, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3883.0, "completions/mean_length": 1002.9141235351562, "completions/mean_terminated_length": 592.3274536132812, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 9.102623906705539, "grad_norm": 0.22389450669288635, "learning_rate": 1e-06, "loss": -0.0374, "num_tokens": 544538366.0, "reward": 0.5926339626312256, "reward_std": 0.12933549284934998, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 974 }, { "clip_ratio/high_max": 0.002657514691236429, "clip_ratio/high_mean": 0.0009615100443625124, "clip_ratio/low_mean": 0.0006056770866962324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015671870933147147, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3349.0, "completions/mean_length": 1071.8226318359375, "completions/mean_terminated_length": 530.6539916992188, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 9.11195335276968, "grad_norm": 0.4175319969654083, "learning_rate": 1e-06, "loss": -0.0895, "num_tokens": 545047175.0, "reward": 0.5837053656578064, "reward_std": 0.16506798565387726, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 975 }, { "clip_ratio/high_max": 0.0018570687279861886, "clip_ratio/high_mean": 0.0006237320376385469, "clip_ratio/low_mean": 0.0005383524844546628, "clip_ratio/low_min": 1.752172647684347e-05, "clip_ratio/region_mean": 0.0011620845216384623, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2910.0, "completions/mean_length": 983.0223388671875, "completions/mean_terminated_length": 515.4762573242188, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 9.12128279883382, "grad_norm": 0.2701054811477661, "learning_rate": 1e-06, "loss": -0.0562, "num_tokens": 545540907.0, "reward": 0.6116071939468384, "reward_std": 0.1429734081029892, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 976 }, { "clip_ratio/high_max": 0.00210203471215209, "clip_ratio/high_mean": 0.0008136369706335245, "clip_ratio/low_mean": 0.00039900924821267836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012126462279411498, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3797.0, "completions/mean_length": 1006.4230346679688, "completions/mean_terminated_length": 587.430908203125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 9.130612244897959, "grad_norm": 0.2305305004119873, "learning_rate": 1e-06, "loss": -0.0651, "num_tokens": 546097342.0, "reward": 0.629464328289032, "reward_std": 0.16825930774211884, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 977 }, { "clip_ratio/high_max": 0.002026208430834231, "clip_ratio/high_mean": 0.0006495902581491464, "clip_ratio/low_mean": 0.00031128956925385864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009608798245608341, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3378.0, "completions/mean_length": 1021.5558471679688, "completions/mean_terminated_length": 518.4649047851562, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.139941690962099, "grad_norm": 0.2514380216598511, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 546585328.0, "reward": 0.6383928656578064, "reward_std": 0.12674400210380554, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 978 }, { "clip_ratio/high_max": 0.0018954524566652253, "clip_ratio/high_mean": 0.0006922929460415617, "clip_ratio/low_mean": 0.00033622576484049205, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010285187290719477, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2062.0, "completions/mean_length": 907.7489013671875, "completions/mean_terminated_length": 538.4993896484375, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 9.14927113702624, "grad_norm": 0.21578700840473175, "learning_rate": 1e-06, "loss": -0.049, "num_tokens": 547112535.0, "reward": 0.6573660969734192, "reward_std": 0.13467325270175934, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 979 }, { "clip_ratio/high_max": 0.0021745014855696354, "clip_ratio/high_mean": 0.0008442952084806166, "clip_ratio/low_mean": 0.0003191076943949156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011634029251581524, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3657.0, "completions/mean_length": 1070.578125, "completions/mean_terminated_length": 552.4993896484375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.15860058309038, "grad_norm": 0.23006924986839294, "learning_rate": 1e-06, "loss": -0.0701, "num_tokens": 547626397.0, "reward": 0.637276828289032, "reward_std": 0.16262802481651306, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 980 }, { "clip_ratio/high_max": 0.0018160391045967117, "clip_ratio/high_mean": 0.0006584514458154445, "clip_ratio/low_mean": 0.00042727838717837585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001085729827536852, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 803.1886596679688, "completions/mean_terminated_length": 519.8072509765625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 9.167930029154519, "grad_norm": 0.2168523222208023, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 548152662.0, "reward": 0.6439732313156128, "reward_std": 0.1546546071767807, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909069061279297, "step": 981 }, { "clip_ratio/high_max": 0.0017473156367486808, "clip_ratio/high_mean": 0.0006193101580720395, "clip_ratio/low_mean": 0.0003872137795042363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010065239112009294, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2810.0, "completions/mean_length": 834.4955444335938, "completions/mean_terminated_length": 505.9410095214844, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.177259475218658, "grad_norm": 0.6085642576217651, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 548643954.0, "reward": 0.6272321939468384, "reward_std": 0.14199379086494446, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 982 }, { "clip_ratio/high_max": 0.0018346663418924436, "clip_ratio/high_mean": 0.0006980431280680932, "clip_ratio/low_mean": 0.00047877328279355424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011768164113163948, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3568.0, "completions/mean_length": 862.716552734375, "completions/mean_terminated_length": 515.0086669921875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 9.186588921282798, "grad_norm": 0.2697436213493347, "learning_rate": 1e-06, "loss": -0.0712, "num_tokens": 549145948.0, "reward": 0.7109375596046448, "reward_std": 0.13947898149490356, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 983 }, { "clip_ratio/high_max": 0.0017626728586037643, "clip_ratio/high_mean": 0.00068703516444657, "clip_ratio/low_mean": 0.00044360163337842096, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011306368105579168, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3812.0, "completions/mean_length": 919.1451416015625, "completions/mean_terminated_length": 564.409423828125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 9.19591836734694, "grad_norm": 0.2808445692062378, "learning_rate": 1e-06, "loss": -0.0214, "num_tokens": 549690614.0, "reward": 0.6328125, "reward_std": 0.1382436603307724, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 984 }, { "clip_ratio/high_max": 0.0019384068218641914, "clip_ratio/high_mean": 0.0005926047469984042, "clip_ratio/low_mean": 0.0002483020130057412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000840906759549398, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2166.0, "completions/mean_length": 776.4230346679688, "completions/mean_terminated_length": 508.1339111328125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 9.205247813411079, "grad_norm": 0.18402858078479767, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 550183529.0, "reward": 0.7243303656578064, "reward_std": 0.10742456465959549, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 985 }, { "clip_ratio/high_max": 0.001789521276805317, "clip_ratio/high_mean": 0.0006327138380584074, "clip_ratio/low_mean": 0.00034425806279614335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000976971881755162, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3967.0, "completions/mean_length": 803.5491333007812, "completions/mean_terminated_length": 511.5091247558594, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 9.214577259475218, "grad_norm": 0.829149603843689, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 550690085.0, "reward": 0.7243303656578064, "reward_std": 0.11404222995042801, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 986 }, { "clip_ratio/high_max": 0.001902834716020152, "clip_ratio/high_mean": 0.0006656191744696116, "clip_ratio/low_mean": 0.00035124982059642207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010168690223508747, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3194.0, "completions/mean_length": 1050.0960693359375, "completions/mean_terminated_length": 565.4307861328125, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 9.223906705539358, "grad_norm": 0.2216397374868393, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 551226619.0, "reward": 0.5926339626312256, "reward_std": 0.13466298580169678, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161848425865173, "step": 987 }, { "clip_ratio/high_max": 0.0016880720722838305, "clip_ratio/high_mean": 0.0006492141983471811, "clip_ratio/low_mean": 0.000368816930858884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010180311364820227, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3736.0, "completions/mean_length": 912.03466796875, "completions/mean_terminated_length": 560.8909912109375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.2332361516035, "grad_norm": 0.2358027994632721, "learning_rate": 1e-06, "loss": -0.0459, "num_tokens": 551779138.0, "reward": 0.6361607313156128, "reward_std": 0.14774592220783234, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 988 }, { "clip_ratio/high_max": 0.0021234851628832985, "clip_ratio/high_mean": 0.0007095335295161931, "clip_ratio/low_mean": 0.0005461098162413691, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012556433612189721, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2474.0, "completions/mean_length": 950.5145263671875, "completions/mean_terminated_length": 523.9404296875, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 9.242565597667639, "grad_norm": 0.27929025888442993, "learning_rate": 1e-06, "loss": -0.0596, "num_tokens": 552276159.0, "reward": 0.668526828289032, "reward_std": 0.1533079892396927, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 989 }, { "clip_ratio/high_max": 0.002024388057179749, "clip_ratio/high_mean": 0.0007182806275523035, "clip_ratio/low_mean": 0.00048276128927682294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012010418940917589, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2961.0, "completions/mean_length": 915.060302734375, "completions/mean_terminated_length": 510.9408874511719, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 9.251895043731778, "grad_norm": 0.2631526589393616, "learning_rate": 1e-06, "loss": -0.0418, "num_tokens": 552779741.0, "reward": 0.6339285969734192, "reward_std": 0.14143106341362, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 990 }, { "clip_ratio/high_max": 0.0014596193941542879, "clip_ratio/high_mean": 0.0005140056018717587, "clip_ratio/low_mean": 0.0005408700999396387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010548756927164504, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2163.0, "completions/mean_length": 839.9085083007812, "completions/mean_terminated_length": 525.0599975585938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 9.261224489795918, "grad_norm": 0.242902934551239, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 553310843.0, "reward": 0.6328125, "reward_std": 0.12192870676517487, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 991 }, { "clip_ratio/high_max": 0.0018979092492372729, "clip_ratio/high_mean": 0.0007676648638152983, "clip_ratio/low_mean": 0.000459608839264547, "clip_ratio/low_min": 1.3619524906971492e-05, "clip_ratio/region_mean": 0.0012272737149032764, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3209.0, "completions/mean_length": 953.8594360351562, "completions/mean_terminated_length": 550.2090454101562, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 9.270553935860057, "grad_norm": 0.2421979159116745, "learning_rate": 1e-06, "loss": -0.0724, "num_tokens": 553834045.0, "reward": 0.6104910969734192, "reward_std": 0.15924307703971863, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791125416755676, "step": 992 }, { "clip_ratio/high_max": 0.0024087222845992073, "clip_ratio/high_mean": 0.0008905131344363326, "clip_ratio/low_mean": 0.0002782431856758194, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011687562946463004, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3689.0, "completions/mean_length": 875.357177734375, "completions/mean_terminated_length": 555.2687377929688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.279883381924199, "grad_norm": 31.97504997253418, "learning_rate": 1e-06, "loss": -0.0541, "num_tokens": 554385797.0, "reward": 0.6975446939468384, "reward_std": 0.146658256649971, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 993 }, { "clip_ratio/high_max": 0.0022565197723452, "clip_ratio/high_mean": 0.0007862259280955186, "clip_ratio/low_mean": 0.0004552455166049185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012414714219630696, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3297.0, "completions/mean_length": 1000.0435791015625, "completions/mean_terminated_length": 535.053955078125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 9.289212827988338, "grad_norm": 0.26120316982269287, "learning_rate": 1e-06, "loss": -0.0539, "num_tokens": 554885780.0, "reward": 0.6383928656578064, "reward_std": 0.13906781375408173, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 994 }, { "clip_ratio/high_max": 0.0017992375142057426, "clip_ratio/high_mean": 0.0006830044858361362, "clip_ratio/low_mean": 0.00041874111070683284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011017456163244788, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3475.0, "completions/mean_length": 850.0870971679688, "completions/mean_terminated_length": 553.56640625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.298542274052478, "grad_norm": 0.23041197657585144, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 555432802.0, "reward": 0.6595982313156128, "reward_std": 0.14462240040302277, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 995 }, { "clip_ratio/high_max": 0.0021802876217407174, "clip_ratio/high_mean": 0.0007658100566914072, "clip_ratio/low_mean": 0.0003805052924690244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001146315331425285, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 943.3114013671875, "completions/mean_terminated_length": 524.8129272460938, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 9.307871720116617, "grad_norm": 0.2721904218196869, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 555940369.0, "reward": 0.660714328289032, "reward_std": 0.14121240377426147, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 996 }, { "clip_ratio/high_max": 0.0017383672566211317, "clip_ratio/high_mean": 0.0005729928307118826, "clip_ratio/low_mean": 0.0005022300210839603, "clip_ratio/low_min": 1.7071837646653876e-05, "clip_ratio/region_mean": 0.001075222869985737, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2191.0, "completions/mean_length": 825.552490234375, "completions/mean_terminated_length": 509.3157958984375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 9.317201166180759, "grad_norm": 0.23203667998313904, "learning_rate": 1e-06, "loss": -0.0468, "num_tokens": 556443808.0, "reward": 0.6796875596046448, "reward_std": 0.1382322758436203, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 997 }, { "clip_ratio/high_max": 0.0016585760640737135, "clip_ratio/high_mean": 0.0005702862890757388, "clip_ratio/low_mean": 0.0004152120200160425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009854983291006647, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3359.0, "completions/mean_length": 869.4163208007812, "completions/mean_terminated_length": 526.8407592773438, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 9.326530612244898, "grad_norm": 0.23990260064601898, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 556948261.0, "reward": 0.6618303656578064, "reward_std": 0.134141206741333, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 998 }, { "clip_ratio/high_max": 0.001819459313992411, "clip_ratio/high_mean": 0.0006607770028494997, "clip_ratio/low_mean": 0.0005408785964391427, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012016556102025788, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2962.0, "completions/mean_length": 830.966552734375, "completions/mean_terminated_length": 457.355712890625, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 9.335860058309038, "grad_norm": 0.26270946860313416, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 557407351.0, "reward": 0.6808035969734192, "reward_std": 0.1371905654668808, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 999 }, { "clip_ratio/high_max": 0.0014994931734690908, "clip_ratio/high_mean": 0.0004761727250297554, "clip_ratio/low_mean": 0.00027690992033058137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007530826533184154, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3933.0, "completions/mean_length": 851.708740234375, "completions/mean_terminated_length": 529.2699584960938, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 9.345189504373177, "grad_norm": 0.19529157876968384, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 557933426.0, "reward": 0.6674107313156128, "reward_std": 0.10701198875904083, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1000 }, { "clip_ratio/high_max": 0.0014798677548242267, "clip_ratio/high_mean": 0.0005599712112598354, "clip_ratio/low_mean": 0.00042298395828765933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009829551727307262, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3759.0, "completions/mean_length": 1040.9967041015625, "completions/mean_terminated_length": 503.76507568359375, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 9.354518950437317, "grad_norm": 0.24427780508995056, "learning_rate": 1e-06, "loss": -0.05, "num_tokens": 558411143.0, "reward": 0.6316964626312256, "reward_std": 0.1380895972251892, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1001 }, { "clip_ratio/high_max": 0.0016418406812590547, "clip_ratio/high_mean": 0.0005652182262565475, "clip_ratio/low_mean": 0.0003534897705321782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009187079886032734, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 938.0848388671875, "completions/mean_terminated_length": 509.8251037597656, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 9.363848396501458, "grad_norm": 0.2545948624610901, "learning_rate": 1e-06, "loss": -0.0307, "num_tokens": 558896099.0, "reward": 0.6473214626312256, "reward_std": 0.11565801501274109, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1002 }, { "clip_ratio/high_max": 0.0018756211575237103, "clip_ratio/high_mean": 0.0006141574685898377, "clip_ratio/low_mean": 0.00042348662009317195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010376441168773454, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2327.0, "completions/mean_length": 935.9129638671875, "completions/mean_terminated_length": 534.4427490234375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 9.373177842565598, "grad_norm": 0.20861415565013885, "learning_rate": 1e-06, "loss": -0.0588, "num_tokens": 559408669.0, "reward": 0.6417410969734192, "reward_std": 0.1255386769771576, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 1003 }, { "clip_ratio/high_max": 0.0017241709101654124, "clip_ratio/high_mean": 0.0007156978317652829, "clip_ratio/low_mean": 0.00039253456225196715, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011082323671871563, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3613.0, "completions/mean_length": 983.443115234375, "completions/mean_terminated_length": 538.7920532226562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 9.382507288629737, "grad_norm": 0.2265206128358841, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 559922538.0, "reward": 0.6484375, "reward_std": 0.1542109102010727, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1004 }, { "clip_ratio/high_max": 0.0017527268464618828, "clip_ratio/high_mean": 0.0005118758144817548, "clip_ratio/low_mean": 0.0004175999210929149, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009294757219322491, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 953.44873046875, "completions/mean_terminated_length": 549.7455444335938, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.391836734693877, "grad_norm": 0.30438947677612305, "learning_rate": 1e-06, "loss": -0.031, "num_tokens": 560444068.0, "reward": 0.6517857313156128, "reward_std": 0.110169418156147, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1005 }, { "clip_ratio/high_max": 0.0017773605286492966, "clip_ratio/high_mean": 0.0005669130550813861, "clip_ratio/low_mean": 0.00047973556957003893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010466486328368774, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 947.6473388671875, "completions/mean_terminated_length": 569.844970703125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.401166180758018, "grad_norm": 0.2808428108692169, "learning_rate": 1e-06, "loss": -0.0394, "num_tokens": 560988552.0, "reward": 0.6283482313156128, "reward_std": 0.14373114705085754, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 1006 }, { "clip_ratio/high_max": 0.0017154052256955765, "clip_ratio/high_mean": 0.0005592965089817881, "clip_ratio/low_mean": 0.00033373002133885166, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008930265339586185, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3733.0, "completions/mean_length": 1008.8761596679688, "completions/mean_terminated_length": 517.6520385742188, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 9.410495626822158, "grad_norm": 0.28805091977119446, "learning_rate": 1e-06, "loss": -0.0717, "num_tokens": 561477145.0, "reward": 0.6227678656578064, "reward_std": 0.11967099457979202, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1007 }, { "clip_ratio/high_max": 0.002020704618189484, "clip_ratio/high_mean": 0.0008039077965804609, "clip_ratio/low_mean": 0.00048153305897358223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012854408487328328, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3521.0, "completions/mean_length": 982.1920166015625, "completions/mean_terminated_length": 546.4172973632812, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 9.419825072886297, "grad_norm": 0.31485286355018616, "learning_rate": 1e-06, "loss": -0.0505, "num_tokens": 561995213.0, "reward": 0.637276828289032, "reward_std": 0.1576700508594513, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1008 }, { "clip_ratio/high_max": 0.0016207741209655069, "clip_ratio/high_mean": 0.0005247285771474708, "clip_ratio/low_mean": 0.0003002406610903563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008249692455137847, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3270.0, "completions/mean_length": 974.9063110351562, "completions/mean_terminated_length": 560.601806640625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.429154518950437, "grad_norm": 0.6327840089797974, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 562526481.0, "reward": 0.6316964626312256, "reward_std": 0.11794712394475937, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1009 }, { "clip_ratio/high_max": 0.002112652495270595, "clip_ratio/high_mean": 0.0007039539596007671, "clip_ratio/low_mean": 0.00043457348101583193, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011385274337953888, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 907.7131958007812, "completions/mean_terminated_length": 493.59771728515625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 9.438483965014576, "grad_norm": 0.25927993655204773, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 563015184.0, "reward": 0.6495535969734192, "reward_std": 0.13511762022972107, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1010 }, { "clip_ratio/high_max": 0.0019028616297873668, "clip_ratio/high_mean": 0.0007404392126773018, "clip_ratio/low_mean": 0.0003135628901418386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010540021012275247, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2398.0, "completions/mean_length": 920.3750610351562, "completions/mean_terminated_length": 516.9307861328125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.447813411078718, "grad_norm": 0.36331212520599365, "learning_rate": 1e-06, "loss": -0.0485, "num_tokens": 563516632.0, "reward": 0.7120535969734192, "reward_std": 0.1408672332763672, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1011 }, { "clip_ratio/high_max": 0.0024156742329068948, "clip_ratio/high_mean": 0.0006409119605450542, "clip_ratio/low_mean": 0.00046294131243485026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001103853246604558, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3469.0, "completions/mean_length": 935.9553833007812, "completions/mean_terminated_length": 493.7099304199219, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 9.457142857142857, "grad_norm": 0.21210302412509918, "learning_rate": 1e-06, "loss": -0.0552, "num_tokens": 563994424.0, "reward": 0.6875000596046448, "reward_std": 0.11306330561637878, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1012 }, { "clip_ratio/high_max": 0.0017442624230170622, "clip_ratio/high_mean": 0.0006475191639765399, "clip_ratio/low_mean": 0.0003780283554988273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010255475135636516, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3601.0, "completions/mean_length": 867.5848388671875, "completions/mean_terminated_length": 542.3636474609375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.466472303206997, "grad_norm": 0.23791077733039856, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 564542748.0, "reward": 0.6026785969734192, "reward_std": 0.1381983608007431, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1013 }, { "clip_ratio/high_max": 0.0013634544957312755, "clip_ratio/high_mean": 0.0004106922224309528, "clip_ratio/low_mean": 0.0003951218922111366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008058141083893133, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 962.7332763671875, "completions/mean_terminated_length": 582.3491821289062, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.475801749271136, "grad_norm": 0.2896384596824646, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 565091525.0, "reward": 0.637276828289032, "reward_std": 0.1061122789978981, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1014 }, { "clip_ratio/high_max": 0.0016935610274231294, "clip_ratio/high_mean": 0.0006141749763628468, "clip_ratio/low_mean": 0.0005587855757767102, "clip_ratio/low_min": 2.942099490610417e-05, "clip_ratio/region_mean": 0.0011729605448635994, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3707.0, "completions/mean_length": 1130.489990234375, "completions/mean_terminated_length": 590.5950317382812, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 9.485131195335278, "grad_norm": 0.22907555103302002, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 565629284.0, "reward": 0.5267857313156128, "reward_std": 0.16352775692939758, "rewards/verify_math_reward/mean": 0.5267857313156128, "rewards/verify_math_reward/std": 0.4995608627796173, "step": 1015 }, { "clip_ratio/high_max": 0.001559922326123342, "clip_ratio/high_mean": 0.0005506666593646514, "clip_ratio/low_mean": 0.0003646592513177893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009153259288723348, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3843.0, "completions/mean_length": 807.7064819335938, "completions/mean_terminated_length": 507.31427001953125, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 9.494460641399417, "grad_norm": 0.2359188050031662, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 566125861.0, "reward": 0.6941964626312256, "reward_std": 0.1270803064107895, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1016 }, { "clip_ratio/high_max": 0.001428876810678048, "clip_ratio/high_mean": 0.0005704977638743003, "clip_ratio/low_mean": 0.00041258256078435807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009830803282966372, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1877.0, "completions/mean_length": 947.3482666015625, "completions/mean_terminated_length": 524.8709106445312, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.503790087463557, "grad_norm": 0.26440975069999695, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 566622821.0, "reward": 0.6741071939468384, "reward_std": 0.14056415855884552, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1017 }, { "clip_ratio/high_max": 0.0027742850725189783, "clip_ratio/high_mean": 0.001015650592307793, "clip_ratio/low_mean": 0.0005431211029645056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001558771666168468, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3354.0, "completions/mean_length": 922.216552734375, "completions/mean_terminated_length": 541.3624877929688, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 9.513119533527696, "grad_norm": 0.31896474957466125, "learning_rate": 1e-06, "loss": -0.0804, "num_tokens": 567149303.0, "reward": 0.6808035969734192, "reward_std": 0.19448629021644592, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1018 }, { "clip_ratio/high_max": 0.002351549257582519, "clip_ratio/high_mean": 0.0008312666177516803, "clip_ratio/low_mean": 0.00045399160035231034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012852582221967168, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3881.0, "completions/mean_length": 924.6942138671875, "completions/mean_terminated_length": 535.235595703125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 9.522448979591836, "grad_norm": 0.33340534567832947, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 567657621.0, "reward": 0.6941964626312256, "reward_std": 0.14458851516246796, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1019 }, { "clip_ratio/high_max": 0.0013591579408966936, "clip_ratio/high_mean": 0.00044431955302570714, "clip_ratio/low_mean": 0.0003737329818704893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008180525419447804, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3327.0, "completions/mean_length": 1015.2891235351562, "completions/mean_terminated_length": 557.132080078125, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 9.531778425655977, "grad_norm": 0.24508723616600037, "learning_rate": 1e-06, "loss": -0.0641, "num_tokens": 568177032.0, "reward": 0.6506696939468384, "reward_std": 0.134664386510849, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1020 }, { "clip_ratio/high_max": 0.0019166687125107273, "clip_ratio/high_mean": 0.0006429423883673735, "clip_ratio/low_mean": 0.00033448019394199946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009774225909495726, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 914.7913208007812, "completions/mean_terminated_length": 515.1419677734375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.541107871720117, "grad_norm": 0.17712129652500153, "learning_rate": 1e-06, "loss": -0.0671, "num_tokens": 568674877.0, "reward": 0.6473214626312256, "reward_std": 0.12050722539424896, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1021 }, { "clip_ratio/high_max": 0.0019794206964434125, "clip_ratio/high_mean": 0.0005501120795088354, "clip_ratio/low_mean": 0.0003695714331115596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009196835089824162, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3052.0, "completions/mean_length": 955.3906860351562, "completions/mean_terminated_length": 529.4778442382812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.550437317784256, "grad_norm": 0.23466232419013977, "learning_rate": 1e-06, "loss": -0.07, "num_tokens": 569180787.0, "reward": 0.6796875596046448, "reward_std": 0.11937858909368515, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1022 }, { "clip_ratio/high_max": 0.001762232495821081, "clip_ratio/high_mean": 0.0005917552807659376, "clip_ratio/low_mean": 0.0004402920067150262, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010320473120373208, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2850.0, "completions/mean_length": 1067.9320068359375, "completions/mean_terminated_length": 613.138671875, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 9.559766763848396, "grad_norm": 19.286802291870117, "learning_rate": 1e-06, "loss": -0.066, "num_tokens": 569759566.0, "reward": 0.5714285969734192, "reward_std": 0.14981746673583984, "rewards/verify_math_reward/mean": 0.5714285969734192, "rewards/verify_math_reward/std": 0.49514803290367126, "step": 1023 }, { "clip_ratio/high_max": 0.0021669982379535213, "clip_ratio/high_mean": 0.0006441391160478815, "clip_ratio/low_mean": 0.0005867732265869563, "clip_ratio/low_min": 1.822157355491072e-05, "clip_ratio/region_mean": 0.0012309123158047441, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3607.0, "completions/mean_length": 841.3928833007812, "completions/mean_terminated_length": 535.4041748046875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.569096209912537, "grad_norm": 0.2259806990623474, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 570297430.0, "reward": 0.6506696939468384, "reward_std": 0.13339193165302277, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1024 }, { "clip_ratio/high_max": 0.0018299978873983491, "clip_ratio/high_mean": 0.0007017430471023545, "clip_ratio/low_mean": 0.0004816478376596933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011833908647531644, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2896.0, "completions/mean_length": 965.9063110351562, "completions/mean_terminated_length": 523.3070068359375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 9.578425655976677, "grad_norm": 0.3631192147731781, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 570791330.0, "reward": 0.668526828289032, "reward_std": 0.14312496781349182, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 1025 }, { "clip_ratio/high_max": 0.0014764480984013062, "clip_ratio/high_mean": 0.0005153620877536014, "clip_ratio/low_mean": 0.00024874671225916245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007641087759111542, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2611.0, "completions/mean_length": 915.0256958007812, "completions/mean_terminated_length": 483.6387939453125, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 9.587755102040816, "grad_norm": 0.18286962807178497, "learning_rate": 1e-06, "loss": -0.0731, "num_tokens": 571270201.0, "reward": 0.6584821939468384, "reward_std": 0.09014318883419037, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1026 }, { "clip_ratio/high_max": 0.0013748594756179955, "clip_ratio/high_mean": 0.0004334319114605023, "clip_ratio/low_mean": 0.0002134787928298465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006469107211160008, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1857.0, "completions/mean_length": 798.8560791015625, "completions/mean_terminated_length": 515.101806640625, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 9.597084548104956, "grad_norm": 0.18646889925003052, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 571781144.0, "reward": 0.6886160969734192, "reward_std": 0.08683561533689499, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1027 }, { "clip_ratio/high_max": 0.0016353268656530418, "clip_ratio/high_mean": 0.0005685481555701699, "clip_ratio/low_mean": 0.0005310405799718865, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010995887278113514, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2253.0, "completions/mean_length": 976.1060791015625, "completions/mean_terminated_length": 553.0025024414062, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 9.606413994169095, "grad_norm": 0.25454413890838623, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 572311895.0, "reward": 0.6350446939468384, "reward_std": 0.11892351508140564, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1028 }, { "clip_ratio/high_max": 0.0025896687766362447, "clip_ratio/high_mean": 0.0008289245524792932, "clip_ratio/low_mean": 0.000477146599223488, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013060711607977282, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3626.0, "completions/mean_length": 844.107177734375, "completions/mean_terminated_length": 516.5208740234375, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 9.615743440233237, "grad_norm": 0.2483367770910263, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 572827111.0, "reward": 0.6540178656578064, "reward_std": 0.14229939877986908, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1029 }, { "clip_ratio/high_max": 0.0017741328192641959, "clip_ratio/high_mean": 0.0006298645785136614, "clip_ratio/low_mean": 0.00039479202087022713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00102465658710571, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4030.0, "completions/mean_length": 833.6998291015625, "completions/mean_terminated_length": 505.0650939941406, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 9.625072886297376, "grad_norm": 0.2444959580898285, "learning_rate": 1e-06, "loss": -0.0477, "num_tokens": 573323586.0, "reward": 0.7020089626312256, "reward_std": 0.13373075425624847, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1030 }, { "clip_ratio/high_max": 0.0019861087275785394, "clip_ratio/high_mean": 0.0006300596960500116, "clip_ratio/low_mean": 0.0004409523812682892, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010710120586736593, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3505.0, "completions/mean_length": 940.4420166015625, "completions/mean_terminated_length": 535.0679931640625, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.634402332361516, "grad_norm": 0.2334720492362976, "learning_rate": 1e-06, "loss": -0.0682, "num_tokens": 573838094.0, "reward": 0.6283482313156128, "reward_std": 0.13989083468914032, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 1031 }, { "clip_ratio/high_max": 0.0019386091371416114, "clip_ratio/high_mean": 0.0007693026418564841, "clip_ratio/low_mean": 0.00048742640728960396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012567290759761818, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 1007.23779296875, "completions/mean_terminated_length": 534.1840209960938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 9.643731778425655, "grad_norm": 0.2592686712741852, "learning_rate": 1e-06, "loss": -0.0581, "num_tokens": 574347395.0, "reward": 0.6350446939468384, "reward_std": 0.16180318593978882, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1032 }, { "clip_ratio/high_max": 0.0020356640306999907, "clip_ratio/high_mean": 0.0006960072187212063, "clip_ratio/low_mean": 0.0004890329191766796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011850401460833382, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2936.0, "completions/mean_length": 911.08935546875, "completions/mean_terminated_length": 515.4730224609375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 9.653061224489797, "grad_norm": 0.326799213886261, "learning_rate": 1e-06, "loss": -0.0304, "num_tokens": 574842171.0, "reward": 0.65625, "reward_std": 0.12971457839012146, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1033 }, { "clip_ratio/high_max": 0.0018600934199639596, "clip_ratio/high_mean": 0.000609039145274437, "clip_ratio/low_mean": 0.0002478077349223895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008568468692828901, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1869.0, "completions/mean_length": 975.6328735351562, "completions/mean_terminated_length": 529.8660888671875, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 9.662390670553936, "grad_norm": 0.2780069410800934, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 575347210.0, "reward": 0.6640625, "reward_std": 0.12602722644805908, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1034 }, { "clip_ratio/high_max": 0.0019211800281482283, "clip_ratio/high_mean": 0.0006911874461366097, "clip_ratio/low_mean": 0.0005047573258707416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011959447656408884, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3855.0, "completions/mean_length": 869.3192138671875, "completions/mean_terminated_length": 517.8984985351562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.671720116618076, "grad_norm": 0.2505824565887451, "learning_rate": 1e-06, "loss": -0.0279, "num_tokens": 575850784.0, "reward": 0.637276828289032, "reward_std": 0.14011836051940918, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1035 }, { "clip_ratio/high_max": 0.002050023802439682, "clip_ratio/high_mean": 0.0006352996188070392, "clip_ratio/low_mean": 0.0004639841963580693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010992838651873171, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 899.2489013671875, "completions/mean_terminated_length": 524.5673217773438, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 9.681049562682215, "grad_norm": 0.2341313511133194, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 576360479.0, "reward": 0.6383928656578064, "reward_std": 0.14800554513931274, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1036 }, { "clip_ratio/high_max": 0.0021974491100991145, "clip_ratio/high_mean": 0.00077992530168558, "clip_ratio/low_mean": 0.00038363828616638784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001163563625595998, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3857.0, "completions/mean_length": 977.1641235351562, "completions/mean_terminated_length": 563.1593017578125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 9.690379008746355, "grad_norm": 0.26642727851867676, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 576885210.0, "reward": 0.6495535969734192, "reward_std": 0.1529289036989212, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1037 }, { "clip_ratio/high_max": 0.0017975972768908832, "clip_ratio/high_mean": 0.0007029525049802032, "clip_ratio/low_mean": 0.0003928080700461578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001095760573662119, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2652.0, "completions/mean_length": 942.3638916015625, "completions/mean_terminated_length": 563.927490234375, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 9.699708454810496, "grad_norm": 0.4081231951713562, "learning_rate": 1e-06, "loss": -0.0574, "num_tokens": 577420016.0, "reward": 0.6339285969734192, "reward_std": 0.1612718552350998, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199838399887085, "step": 1038 }, { "clip_ratio/high_max": 0.001977262072614394, "clip_ratio/high_mean": 0.0006768327257304918, "clip_ratio/low_mean": 0.0005148207956153783, "clip_ratio/low_min": 1.3133010725141503e-05, "clip_ratio/region_mean": 0.0011916535222553648, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4071.0, "completions/mean_length": 833.1574096679688, "completions/mean_terminated_length": 517.6560668945312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.709037900874636, "grad_norm": 0.2300080955028534, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 577932253.0, "reward": 0.6383928656578064, "reward_std": 0.14117145538330078, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1039 }, { "clip_ratio/high_max": 0.002007761017011944, "clip_ratio/high_mean": 0.0007245595497806789, "clip_ratio/low_mean": 0.00047620994223507296, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001200769496790599, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1806.0, "completions/mean_length": 992.5123291015625, "completions/mean_terminated_length": 512.5914916992188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 9.718367346938775, "grad_norm": 0.3068905472755432, "learning_rate": 1e-06, "loss": -0.07, "num_tokens": 578415184.0, "reward": 0.6171875, "reward_std": 0.13873080909252167, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1040 }, { "clip_ratio/high_max": 0.0017484277159383055, "clip_ratio/high_mean": 0.0006192948048919789, "clip_ratio/low_mean": 0.00036504063518805197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009843354600889143, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3675.0, "completions/mean_length": 846.3895263671875, "completions/mean_terminated_length": 483.52978515625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 9.727696793002915, "grad_norm": 285.1891784667969, "learning_rate": 1e-06, "loss": 4276.3335, "num_tokens": 578895469.0, "reward": 0.699776828289032, "reward_std": 0.13534586131572723, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1041 }, { "clip_ratio/high_max": 0.0017575875244801864, "clip_ratio/high_mean": 0.0006908914892846951, "clip_ratio/low_mean": 0.0003417383965143017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010326299016014673, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2526.0, "completions/mean_length": 823.8717041015625, "completions/mean_terminated_length": 511.85943603515625, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 9.737026239067056, "grad_norm": 0.27488529682159424, "learning_rate": 1e-06, "loss": -0.0782, "num_tokens": 579396554.0, "reward": 0.7053571939468384, "reward_std": 0.15097612142562866, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 1042 }, { "clip_ratio/high_max": 0.0018153538767364807, "clip_ratio/high_mean": 0.0006185945339893806, "clip_ratio/low_mean": 0.0003676805572467856, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009862750848697033, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3552.0, "completions/mean_length": 974.7020263671875, "completions/mean_terminated_length": 524.2464599609375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 9.746355685131196, "grad_norm": 0.21488317847251892, "learning_rate": 1e-06, "loss": -0.0482, "num_tokens": 579893967.0, "reward": 0.637276828289032, "reward_std": 0.13380491733551025, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1043 }, { "clip_ratio/high_max": 0.0021967298671370372, "clip_ratio/high_mean": 0.0008200539982681221, "clip_ratio/low_mean": 0.0004588539322867291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001278907944652019, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2737.0, "completions/mean_length": 878.49560546875, "completions/mean_terminated_length": 558.718994140625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 9.755685131195335, "grad_norm": 0.2162492871284485, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 580435051.0, "reward": 0.6863839626312256, "reward_std": 0.1738969385623932, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 1044 }, { "clip_ratio/high_max": 0.001430839121894678, "clip_ratio/high_mean": 0.0005467789214890217, "clip_ratio/low_mean": 0.0002811250105878571, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008279039466287941, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3511.0, "completions/mean_length": 915.7645263671875, "completions/mean_terminated_length": 502.69482421875, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.765014577259475, "grad_norm": 0.33657416701316833, "learning_rate": 1e-06, "loss": -0.0416, "num_tokens": 580908656.0, "reward": 0.6830357313156128, "reward_std": 0.12523627281188965, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1045 }, { "clip_ratio/high_max": 0.001537170282972511, "clip_ratio/high_mean": 0.0005386366037782864, "clip_ratio/low_mean": 0.0003947968293687154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009334334226878127, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3805.0, "completions/mean_length": 1052.7210693359375, "completions/mean_terminated_length": 617.966796875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.774344023323614, "grad_norm": 0.2501138746738434, "learning_rate": 1e-06, "loss": -0.0503, "num_tokens": 581487438.0, "reward": 0.5970982313156128, "reward_std": 0.14368626475334167, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 1046 }, { "clip_ratio/high_max": 0.001727103088342119, "clip_ratio/high_mean": 0.000598639273448498, "clip_ratio/low_mean": 0.0003986700412497157, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009973093328881077, "completions/clipped_ratio": 0.1573660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2466.0, "completions/mean_length": 1109.1942138671875, "completions/mean_terminated_length": 551.3933715820312, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 9.783673469387756, "grad_norm": 0.2089390605688095, "learning_rate": 1e-06, "loss": -0.062, "num_tokens": 582001148.0, "reward": 0.5970982313156128, "reward_std": 0.13110283017158508, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 1047 }, { "clip_ratio/high_max": 0.0013950355314591434, "clip_ratio/high_mean": 0.0005012010387872579, "clip_ratio/low_mean": 0.00040065501434582984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009018560567710665, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3303.0, "completions/mean_length": 962.224365234375, "completions/mean_terminated_length": 555.1891479492188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 9.793002915451895, "grad_norm": 0.20835205912590027, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 582534837.0, "reward": 0.6305803656578064, "reward_std": 0.10434380918741226, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1048 }, { "clip_ratio/high_max": 0.0016963560519798193, "clip_ratio/high_mean": 0.0006850205991213443, "clip_ratio/low_mean": 0.00036972204134144704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010547426354605705, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3177.0, "completions/mean_length": 896.427490234375, "completions/mean_terminated_length": 547.9591674804688, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 9.802332361516035, "grad_norm": 0.2542431354522705, "learning_rate": 1e-06, "loss": -0.0307, "num_tokens": 583065428.0, "reward": 0.625, "reward_std": 0.14147454500198364, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1049 }, { "clip_ratio/high_max": 0.0015421499847434461, "clip_ratio/high_mean": 0.0005322884244378656, "clip_ratio/low_mean": 0.0003362369741353177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008685253978910623, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 919.0402221679688, "completions/mean_terminated_length": 528.88720703125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 9.811661807580174, "grad_norm": 0.2551579773426056, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 583575320.0, "reward": 0.715401828289032, "reward_std": 0.10818270593881607, "rewards/verify_math_reward/mean": 0.7154017686843872, "rewards/verify_math_reward/std": 0.4514748752117157, "step": 1050 }, { "clip_ratio/high_max": 0.002346133565879427, "clip_ratio/high_mean": 0.0007620485121151432, "clip_ratio/low_mean": 0.00047810708201723173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012401555977703538, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3246.0, "completions/mean_length": 954.54248046875, "completions/mean_terminated_length": 519.448486328125, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 9.820991253644316, "grad_norm": 0.2352941483259201, "learning_rate": 1e-06, "loss": -0.0785, "num_tokens": 584064694.0, "reward": 0.6640625, "reward_std": 0.13989335298538208, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1051 }, { "clip_ratio/high_max": 0.0018258053314639255, "clip_ratio/high_mean": 0.000634061367236427, "clip_ratio/low_mean": 0.0002967467048620165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009308080552727915, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 1112.204345703125, "completions/mean_terminated_length": 564.321044921875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 9.830320699708455, "grad_norm": 0.1994365155696869, "learning_rate": 1e-06, "loss": -0.0779, "num_tokens": 584586133.0, "reward": 0.6171875, "reward_std": 0.126701220870018, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1052 }, { "clip_ratio/high_max": 0.001893172120617237, "clip_ratio/high_mean": 0.0007289749746632879, "clip_ratio/low_mean": 0.0004387708750073216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011677458642225247, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3258.0, "completions/mean_length": 984.044677734375, "completions/mean_terminated_length": 507.4388732910156, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 9.839650145772595, "grad_norm": 0.287319153547287, "learning_rate": 1e-06, "loss": -0.0876, "num_tokens": 585063253.0, "reward": 0.640625, "reward_std": 0.15353761613368988, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1053 }, { "clip_ratio/high_max": 0.001670144782110583, "clip_ratio/high_mean": 0.0005264264609650127, "clip_ratio/low_mean": 0.0002991365536217927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008255630054918583, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3909.0, "completions/mean_length": 1017.6551513671875, "completions/mean_terminated_length": 555.3106689453125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 9.848979591836734, "grad_norm": 0.22084204852581024, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 585585896.0, "reward": 0.613839328289032, "reward_std": 0.1277529001235962, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1054 }, { "clip_ratio/high_max": 0.0020957619417458773, "clip_ratio/high_mean": 0.0008615199967607623, "clip_ratio/low_mean": 0.0003166778906233958, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001178197893750621, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 1949.0, "completions/mean_length": 947.6082763671875, "completions/mean_terminated_length": 529.68017578125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 9.858309037900874, "grad_norm": 0.24934609234333038, "learning_rate": 1e-06, "loss": -0.0474, "num_tokens": 586107025.0, "reward": 0.6350446939468384, "reward_std": 0.14316701889038086, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1055 }, { "clip_ratio/high_max": 0.0015364606842922512, "clip_ratio/high_mean": 0.0006110137910582125, "clip_ratio/low_mean": 0.0004378621529212978, "clip_ratio/low_min": 3.377717985131312e-05, "clip_ratio/region_mean": 0.0010488759216968901, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3396.0, "completions/mean_length": 1020.7489013671875, "completions/mean_terminated_length": 558.8690795898438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 9.867638483965015, "grad_norm": 0.8965012431144714, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 586636592.0, "reward": 0.6015625, "reward_std": 0.15491561591625214, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 1056 }, { "clip_ratio/high_max": 0.0020013274006487336, "clip_ratio/high_mean": 0.0007237715963128721, "clip_ratio/low_mean": 0.0003563764864793484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010801480912050465, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1985.0, "completions/mean_length": 1004.0324096679688, "completions/mean_terminated_length": 521.2864990234375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 9.876967930029155, "grad_norm": 0.25626060366630554, "learning_rate": 1e-06, "loss": -0.0494, "num_tokens": 587127693.0, "reward": 0.6194196939468384, "reward_std": 0.13981668651103973, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1057 }, { "clip_ratio/high_max": 0.0024253955562016927, "clip_ratio/high_mean": 0.0009168129126919666, "clip_ratio/low_mean": 0.0003930366920030792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013098496492602862, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 959.0770263671875, "completions/mean_terminated_length": 542.6713256835938, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 9.886297376093294, "grad_norm": 0.31020262837409973, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 587650794.0, "reward": 0.652901828289032, "reward_std": 0.17055658996105194, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1058 }, { "clip_ratio/high_max": 0.001750830779201351, "clip_ratio/high_mean": 0.0005940463465776702, "clip_ratio/low_mean": 0.0003966069361922564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009906532814056845, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 958.6585083007812, "completions/mean_terminated_length": 546.684326171875, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 9.895626822157434, "grad_norm": 0.3441726267337799, "learning_rate": 1e-06, "loss": -0.064, "num_tokens": 588170280.0, "reward": 0.6071428656578064, "reward_std": 0.15503397583961487, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1059 }, { "clip_ratio/high_max": 0.0015743103140266612, "clip_ratio/high_mean": 0.0005436696628748905, "clip_ratio/low_mean": 0.00041004702279678895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009537166752124904, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3171.0, "completions/mean_length": 880.1082763671875, "completions/mean_terminated_length": 525.443603515625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 9.904956268221575, "grad_norm": 0.2554823160171509, "learning_rate": 1e-06, "loss": -0.046, "num_tokens": 588680497.0, "reward": 0.6272321939468384, "reward_std": 0.1368863582611084, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111400604248, "step": 1060 }, { "clip_ratio/high_max": 0.0015315202963392949, "clip_ratio/high_mean": 0.0005128437583152845, "clip_ratio/low_mean": 0.0002542623176395864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007671060775464866, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2751.0, "completions/mean_length": 740.3248291015625, "completions/mean_terminated_length": 477.8471984863281, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 9.914285714285715, "grad_norm": 0.27816301584243774, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 589158212.0, "reward": 0.7109375596046448, "reward_std": 0.09841014444828033, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 1061 }, { "clip_ratio/high_max": 0.0016542789126106072, "clip_ratio/high_mean": 0.0005652393183481763, "clip_ratio/low_mean": 0.0003445334227762942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009097727306652814, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 918.325927734375, "completions/mean_terminated_length": 523.6085205078125, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 9.923615160349854, "grad_norm": 0.30398622155189514, "learning_rate": 1e-06, "loss": -0.0484, "num_tokens": 589664384.0, "reward": 0.6953125596046448, "reward_std": 0.12140554934740067, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1062 }, { "clip_ratio/high_max": 0.002037021920841653, "clip_ratio/high_mean": 0.0006436750572902383, "clip_ratio/low_mean": 0.0003814122860603675, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010250873529003002, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2636.0, "completions/mean_length": 896.7879638671875, "completions/mean_terminated_length": 481.25347900390625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 9.932944606413994, "grad_norm": 0.306455135345459, "learning_rate": 1e-06, "loss": -0.0367, "num_tokens": 590141618.0, "reward": 0.6662946939468384, "reward_std": 0.14267736673355103, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1063 }, { "clip_ratio/high_max": 0.0013350238041311968, "clip_ratio/high_mean": 0.0003894673327522469, "clip_ratio/low_mean": 0.00020939956311849528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005988669072394259, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 987.0770263671875, "completions/mean_terminated_length": 596.5087890625, "completions/min_length": 192.0, "completions/min_terminated_length": 192.0, "epoch": 9.942274052478133, "grad_norm": 0.2687012255191803, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 590696279.0, "reward": 0.6517857313156128, "reward_std": 0.10803116858005524, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1064 }, { "clip_ratio/high_max": 0.0015185796437435783, "clip_ratio/high_mean": 0.0005965304562778329, "clip_ratio/low_mean": 0.0004224512408654846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001018981718516443, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3534.0, "completions/mean_length": 981.26904296875, "completions/mean_terminated_length": 540.842041015625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 9.951603498542275, "grad_norm": 0.2318619042634964, "learning_rate": 1e-06, "loss": -0.0375, "num_tokens": 591212744.0, "reward": 0.637276828289032, "reward_std": 0.13365407288074493, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1065 }, { "clip_ratio/high_max": 0.0019497639950714074, "clip_ratio/high_mean": 0.0007552758052042918, "clip_ratio/low_mean": 0.00045476459308702033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012100404055672698, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4029.0, "completions/mean_length": 875.8092041015625, "completions/mean_terminated_length": 525.0952758789062, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 9.960932944606414, "grad_norm": 0.2525951862335205, "learning_rate": 1e-06, "loss": -0.04, "num_tokens": 591728197.0, "reward": 0.65625, "reward_std": 0.15361177921295166, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1066 }, { "clip_ratio/high_max": 0.002424847443762701, "clip_ratio/high_mean": 0.0008103852305794135, "clip_ratio/low_mean": 0.0005829717265442014, "clip_ratio/low_min": 1.3742303963226732e-05, "clip_ratio/region_mean": 0.001393356917105848, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4032.0, "completions/mean_length": 959.8973388671875, "completions/mean_terminated_length": 552.5598754882812, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 9.970262390670554, "grad_norm": 0.34032052755355835, "learning_rate": 1e-06, "loss": -0.0366, "num_tokens": 592259161.0, "reward": 0.6383928656578064, "reward_std": 0.15097863972187042, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 1067 }, { "clip_ratio/high_max": 0.0017289511451963335, "clip_ratio/high_mean": 0.0006448427611758234, "clip_ratio/low_mean": 0.00045808370032318635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011029264569515362, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 849.5435791015625, "completions/mean_terminated_length": 531.2634887695312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 9.979591836734693, "grad_norm": 0.20769155025482178, "learning_rate": 1e-06, "loss": -0.0622, "num_tokens": 592771616.0, "reward": 0.7031250596046448, "reward_std": 0.13996823132038116, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1068 }, { "clip_ratio/high_max": 0.0016924042320169974, "clip_ratio/high_mean": 0.0007165549150158768, "clip_ratio/low_mean": 0.00043740931596403243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011539641964191105, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3103.0, "completions/mean_length": 1057.489990234375, "completions/mean_terminated_length": 551.0716552734375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 9.988921282798835, "grad_norm": 0.48560819029808044, "learning_rate": 1e-06, "loss": -0.047, "num_tokens": 593298927.0, "reward": 0.5948660969734192, "reward_std": 0.14324119687080383, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 1069 }, { "clip_ratio/high_max": 0.0022205749482964166, "clip_ratio/high_mean": 0.0007762669993098825, "clip_ratio/low_mean": 0.0003609112159210781, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011371782056812663, "completions/clipped_ratio": 0.15340909090909094, "completions/max_length": 4096.0, "completions/max_terminated_length": 2645.0, "completions/mean_length": 1100.4346923828125, "completions/mean_terminated_length": 557.6140747070312, "completions/min_length": 217.0, "completions/min_terminated_length": 217.0, "epoch": 9.998250728862974, "grad_norm": 0.3290969133377075, "learning_rate": 1e-06, "loss": -0.0801, "num_tokens": 593800012.0, "reward": 0.6383928656578064, "reward_std": 0.1505221724510193, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1070 }, { "clip_ratio/high_max": 0.0019547574120224454, "clip_ratio/high_mean": 0.0006853751092421589, "clip_ratio/low_mean": 0.00044320657161733834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011285816799500026, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 979.6172485351562, "completions/mean_terminated_length": 534.4196166992188, "completions/min_length": 179.0, "completions/min_terminated_length": 179.0, "epoch": 10.00932944606414, "grad_norm": 0.33982887864112854, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 594311501.0, "reward": 0.6350446939468384, "reward_std": 0.12790445983409882, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1071 }, { "clip_ratio/high_max": 0.0016944085218710825, "clip_ratio/high_mean": 0.000578192245484388, "clip_ratio/low_mean": 0.000361381742550293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009395739762112498, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3844.0, "completions/mean_length": 975.099365234375, "completions/mean_terminated_length": 574.1775512695312, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 10.018658892128279, "grad_norm": 0.18213008344173431, "learning_rate": 1e-06, "loss": -0.0564, "num_tokens": 594860806.0, "reward": 0.621651828289032, "reward_std": 0.13233955204486847, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 1072 }, { "clip_ratio/high_max": 0.0018270213731739204, "clip_ratio/high_mean": 0.0006081969531805953, "clip_ratio/low_mean": 0.0003907687978426111, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009989657373807859, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2686.0, "completions/mean_length": 954.239990234375, "completions/mean_terminated_length": 528.1710815429688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.02798833819242, "grad_norm": 0.261699378490448, "learning_rate": 1e-06, "loss": -0.0499, "num_tokens": 595368461.0, "reward": 0.6149553656578064, "reward_std": 0.1317012757062912, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1073 }, { "clip_ratio/high_max": 0.0021941313025308773, "clip_ratio/high_mean": 0.0007479084279111703, "clip_ratio/low_mean": 0.0003567929979908513, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001104701434087474, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2046.0, "completions/mean_length": 931.4576416015625, "completions/mean_terminated_length": 511.3856201171875, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 10.03731778425656, "grad_norm": 0.36655399203300476, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 595858919.0, "reward": 0.65625, "reward_std": 0.13549739122390747, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1074 }, { "clip_ratio/high_max": 0.0018448381779307965, "clip_ratio/high_mean": 0.000697012113960227, "clip_ratio/low_mean": 0.0005023614039600943, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001199373546114657, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3958.0, "completions/mean_length": 961.7645263671875, "completions/mean_terminated_length": 563.57861328125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 10.0466472303207, "grad_norm": 0.5529072880744934, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 596402396.0, "reward": 0.629464328289032, "reward_std": 0.1545158475637436, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1075 }, { "clip_ratio/high_max": 0.0011781354987761006, "clip_ratio/high_mean": 0.0003795152715611039, "clip_ratio/low_mean": 0.0003574213467345544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007369366157945478, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3359.0, "completions/mean_length": 882.857177734375, "completions/mean_terminated_length": 532.910888671875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.055976676384839, "grad_norm": 0.1720803827047348, "learning_rate": 1e-06, "loss": -0.0373, "num_tokens": 596909564.0, "reward": 0.7209821939468384, "reward_std": 0.10836746543645859, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1076 }, { "clip_ratio/high_max": 0.0018365723990427796, "clip_ratio/high_mean": 0.0005781471554655582, "clip_ratio/low_mean": 0.000381889202799357, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009600363628123887, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3820.0, "completions/mean_length": 917.3069458007812, "completions/mean_terminated_length": 540.308349609375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.06530612244898, "grad_norm": 0.22136181592941284, "learning_rate": 1e-06, "loss": -0.0448, "num_tokens": 597434695.0, "reward": 0.6707589626312256, "reward_std": 0.11945415288209915, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1077 }, { "clip_ratio/high_max": 0.0018467046065779869, "clip_ratio/high_mean": 0.0006715934341627872, "clip_ratio/low_mean": 0.00040892525566960103, "clip_ratio/low_min": 1.7944301362149417e-05, "clip_ratio/region_mean": 0.001080518692106125, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3786.0, "completions/mean_length": 965.060302734375, "completions/mean_terminated_length": 589.3474731445312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.07463556851312, "grad_norm": 0.24332016706466675, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 597993653.0, "reward": 0.6629464626312256, "reward_std": 0.15751849114894867, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1078 }, { "clip_ratio/high_max": 0.002218414934759494, "clip_ratio/high_mean": 0.0006293981896305922, "clip_ratio/low_mean": 0.00042385161577840336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010532498054089956, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 974.58935546875, "completions/mean_terminated_length": 546.78173828125, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 10.08396501457726, "grad_norm": 0.23597940802574158, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 598527301.0, "reward": 0.6428571939468384, "reward_std": 0.1306481808423996, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 1079 }, { "clip_ratio/high_max": 0.0019827116848318838, "clip_ratio/high_mean": 0.000614834561929456, "clip_ratio/low_mean": 0.0004315246228543401, "clip_ratio/low_min": 1.72986437974032e-05, "clip_ratio/region_mean": 0.001046359198880964, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 1004.4029541015625, "completions/mean_terminated_length": 571.7366333007812, "completions/min_length": 177.0, "completions/min_terminated_length": 177.0, "epoch": 10.093294460641399, "grad_norm": 0.22727219760417938, "learning_rate": 1e-06, "loss": -0.049, "num_tokens": 599078558.0, "reward": 0.6551339626312256, "reward_std": 0.14266961812973022, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1080 }, { "clip_ratio/high_max": 0.0020448147115530446, "clip_ratio/high_mean": 0.0006679803354927571, "clip_ratio/low_mean": 0.00035781238693743944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010257927096972708, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1963.0, "completions/mean_length": 938.966552734375, "completions/mean_terminated_length": 506.2766418457031, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.102623906705539, "grad_norm": 0.3594035804271698, "learning_rate": 1e-06, "loss": -0.0771, "num_tokens": 599570168.0, "reward": 0.6417410969734192, "reward_std": 0.1418525129556656, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 1081 }, { "clip_ratio/high_max": 0.001946572694578208, "clip_ratio/high_mean": 0.0005750222899223445, "clip_ratio/low_mean": 0.0004999674974897061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010749898101494182, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2290.0, "completions/mean_length": 899.0803833007812, "completions/mean_terminated_length": 510.9687194824219, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 10.11195335276968, "grad_norm": 1.8174324035644531, "learning_rate": 1e-06, "loss": -0.0362, "num_tokens": 600073704.0, "reward": 0.6238839626312256, "reward_std": 0.13121160864830017, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.48468026518821716, "step": 1082 }, { "clip_ratio/high_max": 0.0021044298555352725, "clip_ratio/high_mean": 0.0007841647275199648, "clip_ratio/low_mean": 0.0005819887319375994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013661534867424052, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3108.0, "completions/mean_length": 990.1250610351562, "completions/mean_terminated_length": 573.3873291015625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.12128279883382, "grad_norm": 0.5238993763923645, "learning_rate": 1e-06, "loss": -0.0619, "num_tokens": 600626696.0, "reward": 0.6037946939468384, "reward_std": 0.17453886568546295, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1083 }, { "clip_ratio/high_max": 0.001777457473508548, "clip_ratio/high_mean": 0.0006237813104235101, "clip_ratio/low_mean": 0.00034005245925072813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000963833752393839, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2444.0, "completions/mean_length": 1014.5938110351562, "completions/mean_terminated_length": 560.8655395507812, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.130612244897959, "grad_norm": 0.3062381446361542, "learning_rate": 1e-06, "loss": -0.0414, "num_tokens": 601159172.0, "reward": 0.6127232313156128, "reward_std": 0.16194264590740204, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1084 }, { "clip_ratio/high_max": 0.0016755195738369366, "clip_ratio/high_mean": 0.0005690128355126944, "clip_ratio/low_mean": 0.00031835315428452304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008873659971868619, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 938.6685791015625, "completions/mean_terminated_length": 564.2034912109375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 10.139941690962099, "grad_norm": 0.21554887294769287, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 601694747.0, "reward": 0.6395089626312256, "reward_std": 0.11678526550531387, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 1085 }, { "clip_ratio/high_max": 0.001962107911822386, "clip_ratio/high_mean": 0.0006806953333580168, "clip_ratio/low_mean": 0.00027986448321826174, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009605598188500153, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3661.0, "completions/mean_length": 845.0859985351562, "completions/mean_terminated_length": 491.0259704589844, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.14927113702624, "grad_norm": 0.25139451026916504, "learning_rate": 1e-06, "loss": -0.0555, "num_tokens": 602175048.0, "reward": 0.7377232313156128, "reward_std": 0.11832760274410248, "rewards/verify_math_reward/mean": 0.7377232313156128, "rewards/verify_math_reward/std": 0.4401180148124695, "step": 1086 }, { "clip_ratio/high_max": 0.0020427578729140805, "clip_ratio/high_mean": 0.000650840588605206, "clip_ratio/low_mean": 0.00035162167387170484, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010024622570199426, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 901.12060546875, "completions/mean_terminated_length": 526.6583862304688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 10.15860058309038, "grad_norm": 0.25762447714805603, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 602692516.0, "reward": 0.6852678656578064, "reward_std": 0.10096701979637146, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1087 }, { "clip_ratio/high_max": 0.0014616701155318879, "clip_ratio/high_mean": 0.0004872079198321444, "clip_ratio/low_mean": 0.0003279360041688051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008151439233188285, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 1087.3695068359375, "completions/mean_terminated_length": 567.5536499023438, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 10.167930029154519, "grad_norm": 0.25292402505874634, "learning_rate": 1e-06, "loss": -0.0597, "num_tokens": 603211151.0, "reward": 0.6361607313156128, "reward_std": 0.12399842590093613, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1088 }, { "clip_ratio/high_max": 0.001685253420873778, "clip_ratio/high_mean": 0.000670446495860233, "clip_ratio/low_mean": 0.0003939537546102656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010644002595654456, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3662.0, "completions/mean_length": 1110.7210693359375, "completions/mean_terminated_length": 581.137939453125, "completions/min_length": 191.0, "completions/min_terminated_length": 191.0, "epoch": 10.177259475218658, "grad_norm": 0.2215045541524887, "learning_rate": 1e-06, "loss": -0.0229, "num_tokens": 603744957.0, "reward": 0.6417410969734192, "reward_std": 0.11768680810928345, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1089 }, { "clip_ratio/high_max": 0.00181549544504378, "clip_ratio/high_mean": 0.0006686062297376338, "clip_ratio/low_mean": 0.00045631784269062337, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011249240524193738, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4071.0, "completions/mean_length": 790.1361694335938, "completions/mean_terminated_length": 492.5279846191406, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 10.186588921282798, "grad_norm": 0.32556310296058655, "learning_rate": 1e-06, "loss": -0.0486, "num_tokens": 604235239.0, "reward": 0.7209821939468384, "reward_std": 0.14537875354290009, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1090 }, { "clip_ratio/high_max": 0.001611052437510807, "clip_ratio/high_mean": 0.0005533401954380679, "clip_ratio/low_mean": 0.00036057048328075325, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009139106950897258, "completions/clipped_ratio": 0.0758928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2410.0, "completions/mean_length": 767.6082763671875, "completions/mean_terminated_length": 494.2620849609375, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 10.19591836734694, "grad_norm": 0.27630820870399475, "learning_rate": 1e-06, "loss": -0.0482, "num_tokens": 604728640.0, "reward": 0.7087053656578064, "reward_std": 0.1345965713262558, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1091 }, { "clip_ratio/high_max": 0.001777386132744141, "clip_ratio/high_mean": 0.0006654456246906193, "clip_ratio/low_mean": 0.0003801905838827224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001045636188791832, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3302.0, "completions/mean_length": 1055.607177734375, "completions/mean_terminated_length": 553.4876708984375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.205247813411079, "grad_norm": 0.26237422227859497, "learning_rate": 1e-06, "loss": -0.0309, "num_tokens": 605246168.0, "reward": 0.6004464626312256, "reward_std": 0.1293707937002182, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1092 }, { "clip_ratio/high_max": 0.0017463438598497305, "clip_ratio/high_mean": 0.0005789542519778479, "clip_ratio/low_mean": 0.0003651085903584317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009440628382435534, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3867.0, "completions/mean_length": 997.7902221679688, "completions/mean_terminated_length": 550.6666259765625, "completions/min_length": 185.0, "completions/min_terminated_length": 185.0, "epoch": 10.214577259475218, "grad_norm": 0.6341986060142517, "learning_rate": 1e-06, "loss": -0.0337, "num_tokens": 605760532.0, "reward": 0.625, "reward_std": 0.12325026839971542, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1093 }, { "clip_ratio/high_max": 0.0017340375889034476, "clip_ratio/high_mean": 0.000649502487249265, "clip_ratio/low_mean": 0.00038692057205480523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010364230583945755, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 1056.0457763671875, "completions/mean_terminated_length": 544.7626953125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.223906705539358, "grad_norm": 0.4272231161594391, "learning_rate": 1e-06, "loss": -0.0425, "num_tokens": 606274949.0, "reward": 0.6149553656578064, "reward_std": 0.16096946597099304, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1094 }, { "clip_ratio/high_max": 0.002043367341684643, "clip_ratio/high_mean": 0.0007410595953842858, "clip_ratio/low_mean": 0.0004182979655524832, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011593575509323273, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 938.2645263671875, "completions/mean_terminated_length": 532.61083984375, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 10.2332361516035, "grad_norm": 0.22444264590740204, "learning_rate": 1e-06, "loss": -0.0266, "num_tokens": 606781818.0, "reward": 0.6194196939468384, "reward_std": 0.14793068170547485, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1095 }, { "clip_ratio/high_max": 0.001567937772051664, "clip_ratio/high_mean": 0.0005112565959279891, "clip_ratio/low_mean": 0.0002891954236474703, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008004520204849541, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3562.0, "completions/mean_length": 1068.3638916015625, "completions/mean_terminated_length": 559.1525268554688, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.242565597667639, "grad_norm": 0.22511501610279083, "learning_rate": 1e-06, "loss": -0.0795, "num_tokens": 607301448.0, "reward": 0.65625, "reward_std": 0.12707918882369995, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1096 }, { "clip_ratio/high_max": 0.0018530640354583738, "clip_ratio/high_mean": 0.0007109063044481445, "clip_ratio/low_mean": 0.00029942098353785696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001010327274343581, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1970.0, "completions/mean_length": 876.0960083007812, "completions/mean_terminated_length": 498.7007751464844, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 10.251895043731778, "grad_norm": 0.2433689534664154, "learning_rate": 1e-06, "loss": -0.0549, "num_tokens": 607787886.0, "reward": 0.691964328289032, "reward_std": 0.12204564362764359, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1097 }, { "clip_ratio/high_max": 0.001672958787821699, "clip_ratio/high_mean": 0.0005305882577886223, "clip_ratio/low_mean": 0.000292459055344807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008230473013099981, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2153.0, "completions/mean_length": 1066.1351318359375, "completions/mean_terminated_length": 551.92822265625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.261224489795918, "grad_norm": 0.21952423453330994, "learning_rate": 1e-06, "loss": -0.0882, "num_tokens": 608296831.0, "reward": 0.6651785969734192, "reward_std": 0.12486424297094345, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 1098 }, { "clip_ratio/high_max": 0.0017506926433270564, "clip_ratio/high_mean": 0.0005558587704399542, "clip_ratio/low_mean": 0.00022451321137850755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007803719736330095, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2918.0, "completions/mean_length": 973.5123291015625, "completions/mean_terminated_length": 509.142333984375, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 10.270553935860057, "grad_norm": 0.22734098136425018, "learning_rate": 1e-06, "loss": -0.0579, "num_tokens": 608787282.0, "reward": 0.6540178656578064, "reward_std": 0.13109326362609863, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1099 }, { "clip_ratio/high_max": 0.0020766484158230014, "clip_ratio/high_mean": 0.0008685218981554499, "clip_ratio/low_mean": 0.0005024680372116563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013709899249079172, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3960.0, "completions/mean_length": 1012.6574096679688, "completions/mean_terminated_length": 554.1090087890625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 10.279883381924199, "grad_norm": 0.32426655292510986, "learning_rate": 1e-06, "loss": -0.0795, "num_tokens": 609320943.0, "reward": 0.5915178656578064, "reward_std": 0.17569930851459503, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1100 }, { "clip_ratio/high_max": 0.0015093450565473177, "clip_ratio/high_mean": 0.0005851605619682232, "clip_ratio/low_mean": 0.00048782230987853836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001072982864570804, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3776.0, "completions/mean_length": 854.3370971679688, "completions/mean_terminated_length": 532.1594848632812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.289212827988338, "grad_norm": 0.2743111848831177, "learning_rate": 1e-06, "loss": -0.0262, "num_tokens": 609844589.0, "reward": 0.6383928656578064, "reward_std": 0.12869539856910706, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1101 }, { "clip_ratio/high_max": 0.0015608931244059931, "clip_ratio/high_mean": 0.0006012780695527908, "clip_ratio/low_mean": 0.0002929804020368465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008942584863689262, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2797.0, "completions/mean_length": 967.1551513671875, "completions/mean_terminated_length": 542.8377685546875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.298542274052478, "grad_norm": 0.28705301880836487, "learning_rate": 1e-06, "loss": -0.0553, "num_tokens": 610365536.0, "reward": 0.6506696939468384, "reward_std": 0.12377132475376129, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1102 }, { "clip_ratio/high_max": 0.0018560715179773979, "clip_ratio/high_mean": 0.0007170960343501065, "clip_ratio/low_mean": 0.0004387990820760024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011558951082406566, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3475.0, "completions/mean_length": 1000.857177734375, "completions/mean_terminated_length": 540.5538330078125, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 10.307871720116617, "grad_norm": 0.7247589230537415, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 610880184.0, "reward": 0.6506696939468384, "reward_std": 0.15488240122795105, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1103 }, { "clip_ratio/high_max": 0.0018103200854966417, "clip_ratio/high_mean": 0.0005872240326425526, "clip_ratio/low_mean": 0.0003009614656548365, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000888185486473958, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2950.0, "completions/mean_length": 1025.8070068359375, "completions/mean_terminated_length": 573.7298583984375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 10.317201166180759, "grad_norm": 0.27747467160224915, "learning_rate": 1e-06, "loss": -0.0454, "num_tokens": 611412611.0, "reward": 0.6205357313156128, "reward_std": 0.11817465722560883, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1104 }, { "clip_ratio/high_max": 0.0020701095745607745, "clip_ratio/high_mean": 0.0007507752070523566, "clip_ratio/low_mean": 0.0003860654078380321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011368406121619046, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3113.0, "completions/mean_length": 925.513427734375, "completions/mean_terminated_length": 553.9102172851562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 10.326530612244898, "grad_norm": 0.33168232440948486, "learning_rate": 1e-06, "loss": -0.0583, "num_tokens": 611941535.0, "reward": 0.65625, "reward_std": 0.16686922311782837, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1105 }, { "clip_ratio/high_max": 0.0017028767106239684, "clip_ratio/high_mean": 0.0007002319762250409, "clip_ratio/low_mean": 0.000340368744218722, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010406007277197205, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3763.0, "completions/mean_length": 980.3839721679688, "completions/mean_terminated_length": 553.3705444335938, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 10.335860058309038, "grad_norm": 0.18526038527488708, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 612462367.0, "reward": 0.625, "reward_std": 0.14011907577514648, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1106 }, { "clip_ratio/high_max": 0.0014553765322489198, "clip_ratio/high_mean": 0.0005131906091264682, "clip_ratio/low_mean": 0.00034756784725686884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000860758464114042, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3041.0, "completions/mean_length": 748.4185791015625, "completions/mean_terminated_length": 495.2400817871094, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 10.345189504373177, "grad_norm": 0.3814004957675934, "learning_rate": 1e-06, "loss": -0.0353, "num_tokens": 612961486.0, "reward": 0.6875000596046448, "reward_std": 0.11032137274742126, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1107 }, { "clip_ratio/high_max": 0.001951545764313778, "clip_ratio/high_mean": 0.0006905036000262044, "clip_ratio/low_mean": 0.0004273316717444686, "clip_ratio/low_min": 1.80792594619561e-05, "clip_ratio/region_mean": 0.0011178352779097622, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2128.0, "completions/mean_length": 947.2701416015625, "completions/mean_terminated_length": 506.608154296875, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 10.354518950437317, "grad_norm": 3.8275399208068848, "learning_rate": 1e-06, "loss": -0.0547, "num_tokens": 613443592.0, "reward": 0.652901828289032, "reward_std": 0.1478540003299713, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1108 }, { "clip_ratio/high_max": 0.001640619975660229, "clip_ratio/high_mean": 0.0005905162179260515, "clip_ratio/low_mean": 0.00019823863942747266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007887548681537737, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2430.0, "completions/mean_length": 833.5670166015625, "completions/mean_terminated_length": 500.5018310546875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 10.363848396501458, "grad_norm": 0.47877609729766846, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 613937892.0, "reward": 0.7299107313156128, "reward_std": 0.09724828600883484, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1109 }, { "clip_ratio/high_max": 0.0015125883110158611, "clip_ratio/high_mean": 0.0005202195807214594, "clip_ratio/low_mean": 0.0003805459919021814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009007655899040401, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 1018.919677734375, "completions/mean_terminated_length": 547.6550903320312, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 10.373177842565598, "grad_norm": 0.2592847943305969, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 614448516.0, "reward": 0.6227678656578064, "reward_std": 0.12711350619792938, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644601345062, "step": 1110 }, { "clip_ratio/high_max": 0.0018024676373897819, "clip_ratio/high_mean": 0.0006374157117079449, "clip_ratio/low_mean": 0.00029162323539821955, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009290389598390902, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 1014.2031860351562, "completions/mean_terminated_length": 537.6365966796875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 10.382507288629737, "grad_norm": 0.2130023092031479, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 614939474.0, "reward": 0.6696428656578064, "reward_std": 0.12125540524721146, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1111 }, { "clip_ratio/high_max": 0.0017755914850567933, "clip_ratio/high_mean": 0.0006347213902699878, "clip_ratio/low_mean": 0.00036021375035488745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000994935144262854, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3961.0, "completions/mean_length": 930.060302734375, "completions/mean_terminated_length": 523.3526000976562, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 10.391836734693877, "grad_norm": 0.2003924697637558, "learning_rate": 1e-06, "loss": -0.0563, "num_tokens": 615440536.0, "reward": 0.7031250596046448, "reward_std": 0.12978127598762512, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1112 }, { "clip_ratio/high_max": 0.002548636020947015, "clip_ratio/high_mean": 0.000742576129596273, "clip_ratio/low_mean": 0.0003879735222653835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011305496627755929, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2619.0, "completions/mean_length": 1151.227783203125, "completions/mean_terminated_length": 582.6630859375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 10.401166180758018, "grad_norm": 0.23142004013061523, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 615977868.0, "reward": 0.5848214626312256, "reward_std": 0.12005076557397842, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1113 }, { "clip_ratio/high_max": 0.0020881131422356702, "clip_ratio/high_mean": 0.0007140845464164158, "clip_ratio/low_mean": 0.00042816682525881333, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011422513744037133, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2029.0, "completions/mean_length": 878.8761596679688, "completions/mean_terminated_length": 506.2826843261719, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 10.410495626822158, "grad_norm": 0.25440171360969543, "learning_rate": 1e-06, "loss": -0.08, "num_tokens": 616478781.0, "reward": 0.691964328289032, "reward_std": 0.15477293729782104, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1114 }, { "clip_ratio/high_max": 0.0019092388392891735, "clip_ratio/high_mean": 0.0006588499018107541, "clip_ratio/low_mean": 0.0002634126562952588, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009222625667462125, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3362.0, "completions/mean_length": 983.9922485351562, "completions/mean_terminated_length": 566.431640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 10.419825072886297, "grad_norm": 1.254032015800476, "learning_rate": 1e-06, "loss": -0.0607, "num_tokens": 617005558.0, "reward": 0.6718750596046448, "reward_std": 0.13696163892745972, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1115 }, { "clip_ratio/high_max": 0.0014197824675648008, "clip_ratio/high_mean": 0.0005624952509606373, "clip_ratio/low_mean": 0.0003861075886106846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009486028357059695, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2565.0, "completions/mean_length": 1041.786865234375, "completions/mean_terminated_length": 542.0064697265625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 10.429154518950437, "grad_norm": 0.27578651905059814, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 617515159.0, "reward": 0.6272321939468384, "reward_std": 0.13436834514141083, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1116 }, { "clip_ratio/high_max": 0.0018176795165345538, "clip_ratio/high_mean": 0.0006974294037718209, "clip_ratio/low_mean": 0.0004957339324391796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011931633416679688, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 860.3995971679688, "completions/mean_terminated_length": 543.183837890625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.438483965014576, "grad_norm": 12.257213592529297, "learning_rate": 1e-06, "loss": -0.0364, "num_tokens": 618038285.0, "reward": 0.6852678656578064, "reward_std": 0.15059886872768402, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1117 }, { "clip_ratio/high_max": 0.0021207968675298616, "clip_ratio/high_mean": 0.0008221366988436785, "clip_ratio/low_mean": 0.0002894268523050414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001111563527956605, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2600.0, "completions/mean_length": 1019.6563110351562, "completions/mean_terminated_length": 534.7545166015625, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 10.447813411078718, "grad_norm": 0.24080021679401398, "learning_rate": 1e-06, "loss": -0.0816, "num_tokens": 618543457.0, "reward": 0.645089328289032, "reward_std": 0.14800554513931274, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1118 }, { "clip_ratio/high_max": 0.0016128017959999852, "clip_ratio/high_mean": 0.00048695453187974636, "clip_ratio/low_mean": 0.0004715990726253949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009585536354279611, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 1929.0, "completions/mean_length": 1042.7734375, "completions/mean_terminated_length": 552.3587646484375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.457142857142857, "grad_norm": 0.4047533869743347, "learning_rate": 1e-06, "loss": -0.0117, "num_tokens": 619063030.0, "reward": 0.5970982313156128, "reward_std": 0.11321737617254257, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.4907552897930145, "step": 1119 }, { "clip_ratio/high_max": 0.0019372772803762928, "clip_ratio/high_mean": 0.0008348385472345399, "clip_ratio/low_mean": 0.0002843746135567926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011192131423740648, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2557.0, "completions/mean_length": 1025.466552734375, "completions/mean_terminated_length": 518.3693237304688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 10.466472303206997, "grad_norm": 0.33380305767059326, "learning_rate": 1e-06, "loss": -0.0881, "num_tokens": 619553032.0, "reward": 0.6640625, "reward_std": 0.16371390223503113, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1120 }, { "clip_ratio/high_max": 0.0017307962280028732, "clip_ratio/high_mean": 0.000534700634489127, "clip_ratio/low_mean": 0.0005774096780442051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011121103161713108, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3031.0, "completions/mean_length": 1008.3438110351562, "completions/mean_terminated_length": 549.1538696289062, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.475801749271136, "grad_norm": 1.0805732011795044, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 620072348.0, "reward": 0.606026828289032, "reward_std": 0.1387321949005127, "rewards/verify_math_reward/mean": 0.6060267686843872, "rewards/verify_math_reward/std": 0.48890194296836853, "step": 1121 }, { "clip_ratio/high_max": 0.0016814146165415877, "clip_ratio/high_mean": 0.0006143256759969518, "clip_ratio/low_mean": 0.0004442820040821971, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010586076587060234, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 1927.0, "completions/mean_length": 881.3426513671875, "completions/mean_terminated_length": 495.583740234375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.485131195335278, "grad_norm": 1.1055032014846802, "learning_rate": 1e-06, "loss": -0.0291, "num_tokens": 620549447.0, "reward": 0.6774553656578064, "reward_std": 0.12392497807741165, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1122 }, { "clip_ratio/high_max": 0.0016898810972634237, "clip_ratio/high_mean": 0.0006642912476308993, "clip_ratio/low_mean": 0.00029380264049905236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009580938731232891, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 935.8672485351562, "completions/mean_terminated_length": 547.7807006835938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.494460641399417, "grad_norm": 0.21529246866703033, "learning_rate": 1e-06, "loss": -0.0423, "num_tokens": 621070672.0, "reward": 0.645089328289032, "reward_std": 0.13260099291801453, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1123 }, { "clip_ratio/high_max": 0.0013960718279122375, "clip_ratio/high_mean": 0.000468820533114922, "clip_ratio/low_mean": 0.00036943050645277253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008382510241062846, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3974.0, "completions/mean_length": 1006.9620971679688, "completions/mean_terminated_length": 520.0594482421875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 10.503790087463557, "grad_norm": 0.2957065999507904, "learning_rate": 1e-06, "loss": -0.0126, "num_tokens": 621559102.0, "reward": 0.6383928656578064, "reward_std": 0.1060783639550209, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 1124 }, { "clip_ratio/high_max": 0.0018658112785487901, "clip_ratio/high_mean": 0.0006119315316936991, "clip_ratio/low_mean": 0.00037690872568418854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009888402819342446, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2599.0, "completions/mean_length": 899.810302734375, "completions/mean_terminated_length": 484.6683349609375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.513119533527696, "grad_norm": 0.30778059363365173, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 622023668.0, "reward": 0.6662946939468384, "reward_std": 0.11937858164310455, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1125 }, { "clip_ratio/high_max": 0.0014257887396524893, "clip_ratio/high_mean": 0.00043771538548753597, "clip_ratio/low_mean": 0.00021784382488476695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006555592099175556, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2198.0, "completions/mean_length": 919.7980346679688, "completions/mean_terminated_length": 484.48095703125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 10.522448979591836, "grad_norm": 0.21127763390541077, "learning_rate": 1e-06, "loss": -0.0179, "num_tokens": 622511223.0, "reward": 0.684151828289032, "reward_std": 0.10167493671178818, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1126 }, { "clip_ratio/high_max": 0.0018633555264386814, "clip_ratio/high_mean": 0.0006138461822047248, "clip_ratio/low_mean": 0.0003756702421924274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009895164184854366, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 1989.0, "completions/mean_length": 948.1585083007812, "completions/mean_terminated_length": 493.8722839355469, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.531778425655977, "grad_norm": 0.8563103079795837, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 622989765.0, "reward": 0.6908482313156128, "reward_std": 0.11843496561050415, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1127 }, { "clip_ratio/high_max": 0.0018796679833030794, "clip_ratio/high_mean": 0.0007108889294613618, "clip_ratio/low_mean": 0.00045899574706709245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011698846719809808, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3387.0, "completions/mean_length": 919.7734985351562, "completions/mean_terminated_length": 493.59619140625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.541107871720117, "grad_norm": 0.2738456130027771, "learning_rate": 1e-06, "loss": -0.0508, "num_tokens": 623481194.0, "reward": 0.6506696939468384, "reward_std": 0.1384280025959015, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1128 }, { "clip_ratio/high_max": 0.00207661838066997, "clip_ratio/high_mean": 0.0006093209012760781, "clip_ratio/low_mean": 0.0004467806620596093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010561015842540655, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3407.0, "completions/mean_length": 917.294677734375, "completions/mean_terminated_length": 495.3426208496094, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.550437317784256, "grad_norm": 0.21665704250335693, "learning_rate": 1e-06, "loss": -0.0468, "num_tokens": 623961794.0, "reward": 0.6316964626312256, "reward_std": 0.12328347563743591, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1129 }, { "clip_ratio/high_max": 0.001891064501251094, "clip_ratio/high_mean": 0.0007400507220154395, "clip_ratio/low_mean": 0.0003224086344744137, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010624593396642013, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3923.0, "completions/mean_length": 892.1250610351562, "completions/mean_terminated_length": 547.5797119140625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 10.559766763848396, "grad_norm": 0.3111117482185364, "learning_rate": 1e-06, "loss": -0.0549, "num_tokens": 624491698.0, "reward": 0.7120535969734192, "reward_std": 0.13665854930877686, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1130 }, { "clip_ratio/high_max": 0.0017117916722781956, "clip_ratio/high_mean": 0.0006911886903253617, "clip_ratio/low_mean": 0.00034528584274085006, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010364745103288442, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1861.0, "completions/mean_length": 815.4642944335938, "completions/mean_terminated_length": 493.8431396484375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.569096209912537, "grad_norm": 0.38041985034942627, "learning_rate": 1e-06, "loss": -0.0459, "num_tokens": 624975138.0, "reward": 0.6774553656578064, "reward_std": 0.12974987924098969, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1131 }, { "clip_ratio/high_max": 0.002079599682474509, "clip_ratio/high_mean": 0.000777964811277343, "clip_ratio/low_mean": 0.0005423197117124801, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013202844784245826, "completions/clipped_ratio": 0.1741071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3252.0, "completions/mean_length": 1179.4320068359375, "completions/mean_terminated_length": 564.5878295898438, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 10.578425655976677, "grad_norm": 0.23912523686885834, "learning_rate": 1e-06, "loss": -0.1093, "num_tokens": 625486781.0, "reward": 0.598214328289032, "reward_std": 0.17626453936100006, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1132 }, { "clip_ratio/high_max": 0.0020934834028594196, "clip_ratio/high_mean": 0.0007495282061427133, "clip_ratio/low_mean": 0.0004241990473019541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011737272616301198, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 943.2902221679688, "completions/mean_terminated_length": 542.7572021484375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 10.587755102040816, "grad_norm": 0.2606118321418762, "learning_rate": 1e-06, "loss": -0.0624, "num_tokens": 626017193.0, "reward": 0.6573660969734192, "reward_std": 0.14613012969493866, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1133 }, { "clip_ratio/high_max": 0.0017134799454652239, "clip_ratio/high_mean": 0.0006363854117807932, "clip_ratio/low_mean": 0.0005908216662646737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012272070634935517, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2954.0, "completions/mean_length": 952.458740234375, "completions/mean_terminated_length": 535.1744995117188, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.597084548104956, "grad_norm": 0.26522207260131836, "learning_rate": 1e-06, "loss": -0.0445, "num_tokens": 626530348.0, "reward": 0.6316964626312256, "reward_std": 0.14245164394378662, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1134 }, { "clip_ratio/high_max": 0.002229847199487267, "clip_ratio/high_mean": 0.0007801658339303685, "clip_ratio/low_mean": 0.0003450735118804005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011252393596805632, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3440.0, "completions/mean_length": 926.1785888671875, "completions/mean_terminated_length": 509.93939208984375, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 10.606413994169095, "grad_norm": 0.4457687735557556, "learning_rate": 1e-06, "loss": -0.0384, "num_tokens": 627019428.0, "reward": 0.6785714626312256, "reward_std": 0.1283590942621231, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1135 }, { "clip_ratio/high_max": 0.0015379057549580466, "clip_ratio/high_mean": 0.0005165713519090787, "clip_ratio/low_mean": 0.00024278755722662027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007593589161842829, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3915.0, "completions/mean_length": 996.0379638671875, "completions/mean_terminated_length": 516.662353515625, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 10.615743440233237, "grad_norm": 0.22791990637779236, "learning_rate": 1e-06, "loss": -0.0556, "num_tokens": 627508086.0, "reward": 0.6662946939468384, "reward_std": 0.10994119197130203, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1136 }, { "clip_ratio/high_max": 0.001585584872373147, "clip_ratio/high_mean": 0.0004915774989058264, "clip_ratio/low_mean": 0.00035343909712537425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008450166023976635, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2700.0, "completions/mean_length": 968.2969360351562, "completions/mean_terminated_length": 503.15130615234375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.625072886297376, "grad_norm": 0.23939189314842224, "learning_rate": 1e-06, "loss": -0.0592, "num_tokens": 627997344.0, "reward": 0.6495535969734192, "reward_std": 0.12399844080209732, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1137 }, { "clip_ratio/high_max": 0.001792915580153931, "clip_ratio/high_mean": 0.0005436054998426698, "clip_ratio/low_mean": 0.0002602719532660558, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008038774612941779, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3635.0, "completions/mean_length": 912.0770263671875, "completions/mean_terminated_length": 516.5834350585938, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 10.634402332361516, "grad_norm": 0.2562861442565918, "learning_rate": 1e-06, "loss": -0.0208, "num_tokens": 628500997.0, "reward": 0.7008928656578064, "reward_std": 0.10750053822994232, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 1138 }, { "clip_ratio/high_max": 0.0023553940845886245, "clip_ratio/high_mean": 0.0008548100431653438, "clip_ratio/low_mean": 0.0003927026818928425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012475127550715115, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1756.0, "completions/mean_length": 839.8638916015625, "completions/mean_terminated_length": 507.4415588378906, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 10.643731778425655, "grad_norm": 0.3247152864933014, "learning_rate": 1e-06, "loss": -0.0415, "num_tokens": 628990683.0, "reward": 0.6875000596046448, "reward_std": 0.14902332425117493, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1139 }, { "clip_ratio/high_max": 0.0014639936889579985, "clip_ratio/high_mean": 0.0005834889207108063, "clip_ratio/low_mean": 0.0005985020097796223, "clip_ratio/low_min": 2.5578065105946735e-05, "clip_ratio/region_mean": 0.0011819909159385134, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 987.2277221679688, "completions/mean_terminated_length": 570.1012573242188, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 10.653061224489797, "grad_norm": 0.26748228073120117, "learning_rate": 1e-06, "loss": -0.022, "num_tokens": 629539639.0, "reward": 0.5837053656578064, "reward_std": 0.14725808799266815, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321892857551575, "step": 1140 }, { "clip_ratio/high_max": 0.00182073871474131, "clip_ratio/high_mean": 0.0006503117965621641, "clip_ratio/low_mean": 0.0003231200644222554, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009734318555274513, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2689.0, "completions/mean_length": 1005.7422485351562, "completions/mean_terminated_length": 546.1654052734375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 10.662390670553936, "grad_norm": 0.3622390329837799, "learning_rate": 1e-06, "loss": -0.0625, "num_tokens": 630049584.0, "reward": 0.6350446939468384, "reward_std": 0.14733223617076874, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1141 }, { "clip_ratio/high_max": 0.0017305209694313817, "clip_ratio/high_mean": 0.0006389486898115138, "clip_ratio/low_mean": 0.00033793572038121056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000976884417468682, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3463.0, "completions/mean_length": 956.1395263671875, "completions/mean_terminated_length": 548.3140258789062, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 10.671720116618076, "grad_norm": 0.20694097876548767, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 630578989.0, "reward": 0.707589328289032, "reward_std": 0.12249323725700378, "rewards/verify_math_reward/mean": 0.7075892686843872, "rewards/verify_math_reward/std": 0.45512402057647705, "step": 1142 }, { "clip_ratio/high_max": 0.0018354836392973084, "clip_ratio/high_mean": 0.0006199017298058607, "clip_ratio/low_mean": 0.0003048370072065154, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009247387133655138, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 907.5435791015625, "completions/mean_terminated_length": 493.40606689453125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.681049562682215, "grad_norm": 0.36741942167282104, "learning_rate": 1e-06, "loss": -0.0377, "num_tokens": 631069132.0, "reward": 0.6785714626312256, "reward_std": 0.12140624225139618, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1143 }, { "clip_ratio/high_max": 0.0018875606656365562, "clip_ratio/high_mean": 0.0007068097020237474, "clip_ratio/low_mean": 0.00036111112331127515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00106792084807239, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 892.716552734375, "completions/mean_terminated_length": 521.7260131835938, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.690379008746355, "grad_norm": 0.2807365655899048, "learning_rate": 1e-06, "loss": -0.0443, "num_tokens": 631577734.0, "reward": 0.6261160969734192, "reward_std": 0.13707111775875092, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1144 }, { "clip_ratio/high_max": 0.0013653917631017976, "clip_ratio/high_mean": 0.000463729195871565, "clip_ratio/low_mean": 0.00046903318366275926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009327623738499824, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 902.8672485351562, "completions/mean_terminated_length": 524.1560668945312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.699708454810496, "grad_norm": 0.24377836287021637, "learning_rate": 1e-06, "loss": -0.034, "num_tokens": 632086639.0, "reward": 0.625, "reward_std": 0.13203826546669006, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1145 }, { "clip_ratio/high_max": 0.001926941087731393, "clip_ratio/high_mean": 0.0005894028254260775, "clip_ratio/low_mean": 0.0005284619023768755, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011178647091583116, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3093.0, "completions/mean_length": 774.7767944335938, "completions/mean_terminated_length": 523.591796875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 10.709037900874636, "grad_norm": 0.23393908143043518, "learning_rate": 1e-06, "loss": -0.0268, "num_tokens": 632604487.0, "reward": 0.6819196939468384, "reward_std": 0.12936869263648987, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1146 }, { "clip_ratio/high_max": 0.0017625708496780135, "clip_ratio/high_mean": 0.0006240743450689479, "clip_ratio/low_mean": 0.0003535435023422906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009776178321772022, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3627.0, "completions/mean_length": 903.35498046875, "completions/mean_terminated_length": 529.1546020507812, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.718367346938775, "grad_norm": 0.35867175459861755, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 633118549.0, "reward": 0.6897321939468384, "reward_std": 0.11945345252752304, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1147 }, { "clip_ratio/high_max": 0.0016776008433225797, "clip_ratio/high_mean": 0.0005710745863325428, "clip_ratio/low_mean": 0.000358833816335391, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009299084103986388, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2322.0, "completions/mean_length": 1028.234375, "completions/mean_terminated_length": 521.5942993164062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 10.727696793002915, "grad_norm": 0.2209133505821228, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 633605343.0, "reward": 0.640625, "reward_std": 0.1312875896692276, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1148 }, { "clip_ratio/high_max": 0.0016966796101769432, "clip_ratio/high_mean": 0.0006046265898476122, "clip_ratio/low_mean": 0.000288975000330538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008936015910876449, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3048.0, "completions/mean_length": 908.4063110351562, "completions/mean_terminated_length": 565.6118774414062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 10.737026239067056, "grad_norm": 0.2552914619445801, "learning_rate": 1e-06, "loss": -0.054, "num_tokens": 634148451.0, "reward": 0.6796875596046448, "reward_std": 0.13193020224571228, "rewards/verify_math_reward/mean": 0.6796875, "rewards/verify_math_reward/std": 0.4668572247028351, "step": 1149 }, { "clip_ratio/high_max": 0.0018121568609785754, "clip_ratio/high_mean": 0.0006204616511240602, "clip_ratio/low_mean": 0.00036168388578516897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000982145549642155, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3298.0, "completions/mean_length": 921.3381958007812, "completions/mean_terminated_length": 540.3787231445312, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 10.746355685131196, "grad_norm": 0.28461557626724243, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 634672034.0, "reward": 0.6361607313156128, "reward_std": 0.15477433800697327, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1150 }, { "clip_ratio/high_max": 0.0015115136702661403, "clip_ratio/high_mean": 0.00047510640524706105, "clip_ratio/low_mean": 0.0003698664345392899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008449728629784659, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3388.0, "completions/mean_length": 985.1295166015625, "completions/mean_terminated_length": 554.2719116210938, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 10.755685131195335, "grad_norm": 0.2832811176776886, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 635198766.0, "reward": 0.5814732313156128, "reward_std": 0.13583439588546753, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935930073261261, "step": 1151 }, { "clip_ratio/high_max": 0.0017652543720032554, "clip_ratio/high_mean": 0.0006981578808336053, "clip_ratio/low_mean": 0.00021140358376214863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009095614623220172, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2570.0, "completions/mean_length": 1023.5714721679688, "completions/mean_terminated_length": 530.072509765625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.765014577259475, "grad_norm": 0.2548813819885254, "learning_rate": 1e-06, "loss": -0.0865, "num_tokens": 635701902.0, "reward": 0.6417410969734192, "reward_std": 0.1361374855041504, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 1152 }, { "clip_ratio/high_max": 0.0015753203006170224, "clip_ratio/high_mean": 0.000633159390417859, "clip_ratio/low_mean": 0.0003180185126439028, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009511779135209508, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 973.185302734375, "completions/mean_terminated_length": 504.1617736816406, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.774344023323614, "grad_norm": 0.32098862528800964, "learning_rate": 1e-06, "loss": -0.078, "num_tokens": 636186060.0, "reward": 0.6785714626312256, "reward_std": 0.1409093141555786, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1153 }, { "clip_ratio/high_max": 0.0021992778820276726, "clip_ratio/high_mean": 0.0007671369257877814, "clip_ratio/low_mean": 0.0003616119411162799, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011287488487141673, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3448.0, "completions/mean_length": 890.9029541015625, "completions/mean_terminated_length": 528.5875854492188, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 10.783673469387756, "grad_norm": 0.22918209433555603, "learning_rate": 1e-06, "loss": -0.0489, "num_tokens": 636699973.0, "reward": 0.6718750596046448, "reward_std": 0.146052747964859, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1154 }, { "clip_ratio/high_max": 0.001795498130377382, "clip_ratio/high_mean": 0.0006084727829147596, "clip_ratio/low_mean": 0.00030649165887552954, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009149644356511999, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3645.0, "completions/mean_length": 924.3605346679688, "completions/mean_terminated_length": 548.198486328125, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.793002915451895, "grad_norm": 0.2120915800333023, "learning_rate": 1e-06, "loss": -0.0388, "num_tokens": 637226384.0, "reward": 0.6037946939468384, "reward_std": 0.12941217422485352, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1155 }, { "clip_ratio/high_max": 0.0018260637007188052, "clip_ratio/high_mean": 0.0006302491547103273, "clip_ratio/low_mean": 0.000405974820296251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010362239736423362, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3427.0, "completions/mean_length": 898.4107666015625, "completions/mean_terminated_length": 545.7645874023438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 10.802332361516035, "grad_norm": 0.2945021688938141, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 637757680.0, "reward": 0.6439732313156128, "reward_std": 0.1316271275281906, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1156 }, { "clip_ratio/high_max": 0.0019284265108581167, "clip_ratio/high_mean": 0.0007179477197496453, "clip_ratio/low_mean": 0.0004558523833111394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011738000976038165, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 926.93310546875, "completions/mean_terminated_length": 524.322021484375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 10.811661807580174, "grad_norm": 0.3774220049381256, "learning_rate": 1e-06, "loss": -0.0378, "num_tokens": 638273036.0, "reward": 0.6305803656578064, "reward_std": 0.14763645827770233, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1157 }, { "clip_ratio/high_max": 0.001877379123470746, "clip_ratio/high_mean": 0.0005780792616860708, "clip_ratio/low_mean": 0.00041741555151020293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000995494829112431, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2687.0, "completions/mean_length": 941.0045166015625, "completions/mean_terminated_length": 513.1406860351562, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 10.820991253644316, "grad_norm": 0.25385749340057373, "learning_rate": 1e-06, "loss": -0.0305, "num_tokens": 638767688.0, "reward": 0.6517857313156128, "reward_std": 0.12471451610326767, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667038440704346, "step": 1158 }, { "clip_ratio/high_max": 0.0018527543907111976, "clip_ratio/high_mean": 0.000647066875899327, "clip_ratio/low_mean": 0.00025293010685345507, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008999970013974234, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3651.0, "completions/mean_length": 799.7924194335938, "completions/mean_terminated_length": 489.892578125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.830320699708455, "grad_norm": 0.6219267845153809, "learning_rate": 1e-06, "loss": -0.0614, "num_tokens": 639254062.0, "reward": 0.7299107313156128, "reward_std": 0.11960569024085999, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425368309020996, "step": 1159 }, { "clip_ratio/high_max": 0.0020307375816628337, "clip_ratio/high_mean": 0.0007411050537484698, "clip_ratio/low_mean": 0.00042177719205938047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011628822358034085, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3510.0, "completions/mean_length": 996.0636596679688, "completions/mean_terminated_length": 535.0474243164062, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 10.839650145772595, "grad_norm": 0.2584342360496521, "learning_rate": 1e-06, "loss": -0.0712, "num_tokens": 639765207.0, "reward": 0.5948660969734192, "reward_std": 0.16175970435142517, "rewards/verify_math_reward/mean": 0.5948660969734192, "rewards/verify_math_reward/std": 0.49119213223457336, "step": 1160 }, { "clip_ratio/high_max": 0.0018753458425635472, "clip_ratio/high_mean": 0.0007418537570629269, "clip_ratio/low_mean": 0.0005814146361444728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013232683886599261, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3906.0, "completions/mean_length": 835.4832763671875, "completions/mean_terminated_length": 515.8247680664062, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 10.848979591836734, "grad_norm": 0.31656092405319214, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 640268168.0, "reward": 0.6897321939468384, "reward_std": 0.16123723983764648, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1161 }, { "clip_ratio/high_max": 0.002378771241637878, "clip_ratio/high_mean": 0.0009034149879880715, "clip_ratio/low_mean": 0.0004387713147480099, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013421862604445778, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3957.0, "completions/mean_length": 907.7422485351562, "completions/mean_terminated_length": 547.3304443359375, "completions/min_length": 178.0, "completions/min_terminated_length": 178.0, "epoch": 10.858309037900874, "grad_norm": 1.2304679155349731, "learning_rate": 1e-06, "loss": -0.0561, "num_tokens": 640796393.0, "reward": 0.652901828289032, "reward_std": 0.1844968944787979, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1162 }, { "clip_ratio/high_max": 0.0016241311132034753, "clip_ratio/high_mean": 0.0005267533124424517, "clip_ratio/low_mean": 0.000373558068076818, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009003113955259323, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 840.7142944335938, "completions/mean_terminated_length": 521.5686645507812, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 10.867638483965015, "grad_norm": 0.3395094573497772, "learning_rate": 1e-06, "loss": -0.0268, "num_tokens": 641311673.0, "reward": 0.6852678656578064, "reward_std": 0.13256637752056122, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.4646684527397156, "step": 1163 }, { "clip_ratio/high_max": 0.0019190324965165928, "clip_ratio/high_mean": 0.0006439916669478407, "clip_ratio/low_mean": 0.00031507297444477445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009590646368451416, "completions/clipped_ratio": 0.1540178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3959.0, "completions/mean_length": 1078.12060546875, "completions/mean_terminated_length": 528.6912841796875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 10.876967930029155, "grad_norm": 0.2265859842300415, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 641794757.0, "reward": 0.5959821939468384, "reward_std": 0.1022394672036171, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.490975022315979, "step": 1164 }, { "clip_ratio/high_max": 0.002210411668784218, "clip_ratio/high_mean": 0.0008060793652475695, "clip_ratio/low_mean": 0.0003716223181982059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011777016770793125, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3364.0, "completions/mean_length": 972.458740234375, "completions/mean_terminated_length": 539.8462524414062, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 10.886297376093294, "grad_norm": 0.3146289885044098, "learning_rate": 1e-06, "loss": -0.0447, "num_tokens": 642304832.0, "reward": 0.6852678656578064, "reward_std": 0.14304757118225098, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1165 }, { "clip_ratio/high_max": 0.0018482201303413603, "clip_ratio/high_mean": 0.0006746981798642082, "clip_ratio/low_mean": 0.00021237814030428126, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008870763049344532, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3116.0, "completions/mean_length": 916.677490234375, "completions/mean_terminated_length": 535.1587524414062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 10.895626822157434, "grad_norm": 0.19967442750930786, "learning_rate": 1e-06, "loss": -0.057, "num_tokens": 642821431.0, "reward": 0.7176339626312256, "reward_std": 0.11817535012960434, "rewards/verify_math_reward/mean": 0.7176339030265808, "rewards/verify_math_reward/std": 0.4504019320011139, "step": 1166 }, { "clip_ratio/high_max": 0.0018805424515448976, "clip_ratio/high_mean": 0.000654468773973349, "clip_ratio/low_mean": 0.0002834320416695846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009379008206451545, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3328.0, "completions/mean_length": 910.1172485351562, "completions/mean_terminated_length": 541.1419677734375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.904956268221575, "grad_norm": 0.3868400752544403, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 643343816.0, "reward": 0.6830357313156128, "reward_std": 0.13218912482261658, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1167 }, { "clip_ratio/high_max": 0.0015009039525466505, "clip_ratio/high_mean": 0.0004454672989595565, "clip_ratio/low_mean": 0.00033791017085604835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000783377481639036, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 933.04248046875, "completions/mean_terminated_length": 462.65386962890625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.914285714285715, "grad_norm": 0.26711606979370117, "learning_rate": 1e-06, "loss": -0.0589, "num_tokens": 643796438.0, "reward": 0.7053571939468384, "reward_std": 0.09337730705738068, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 1168 }, { "clip_ratio/high_max": 0.0021313845718395896, "clip_ratio/high_mean": 0.0008946762845880585, "clip_ratio/low_mean": 0.00042373732230771566, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001318413622357184, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3807.0, "completions/mean_length": 1145.243408203125, "completions/mean_terminated_length": 598.806884765625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 10.923615160349854, "grad_norm": 0.26545679569244385, "learning_rate": 1e-06, "loss": -0.0718, "num_tokens": 644335232.0, "reward": 0.5803571939468384, "reward_std": 0.18103593587875366, "rewards/verify_math_reward/mean": 0.5803571343421936, "rewards/verify_math_reward/std": 0.4937761127948761, "step": 1169 }, { "clip_ratio/high_max": 0.002021483662247192, "clip_ratio/high_mean": 0.0008393947937292978, "clip_ratio/low_mean": 0.00041340081406815443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012527956278063357, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2759.0, "completions/mean_length": 851.1239013671875, "completions/mean_terminated_length": 502.1693420410156, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 10.932944606413994, "grad_norm": 0.27601340413093567, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 644832735.0, "reward": 0.6696428656578064, "reward_std": 0.15665017068386078, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1170 }, { "clip_ratio/high_max": 0.001975671486434294, "clip_ratio/high_mean": 0.0006402839853762998, "clip_ratio/low_mean": 0.00037960739700793056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001019891383293725, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3253.0, "completions/mean_length": 976.9364013671875, "completions/mean_terminated_length": 540.4262084960938, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.942274052478133, "grad_norm": 0.2558325529098511, "learning_rate": 1e-06, "loss": -0.0266, "num_tokens": 645353934.0, "reward": 0.5792410969734192, "reward_std": 0.12264476716518402, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1171 }, { "clip_ratio/high_max": 0.0014388494892045856, "clip_ratio/high_mean": 0.0005496395879163174, "clip_ratio/low_mean": 0.0002644868168317771, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000814126407931326, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 1029.477783203125, "completions/mean_terminated_length": 604.7623901367188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 10.951603498542275, "grad_norm": 0.24647484719753265, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 645920266.0, "reward": 0.590401828289032, "reward_std": 0.13016286492347717, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1172 }, { "clip_ratio/high_max": 0.0018436602185829543, "clip_ratio/high_mean": 0.0006677040037175175, "clip_ratio/low_mean": 0.000404253919441544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010719579295255244, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3685.0, "completions/mean_length": 1034.825927734375, "completions/mean_terminated_length": 628.475341796875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 10.960932944606414, "grad_norm": 0.22879140079021454, "learning_rate": 1e-06, "loss": -0.05, "num_tokens": 646498918.0, "reward": 0.6183035969734192, "reward_std": 0.1615314483642578, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1173 }, { "clip_ratio/high_max": 0.0020206565677654, "clip_ratio/high_mean": 0.0006967060598981334, "clip_ratio/low_mean": 0.000314914309001324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010116203666257206, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3733.0, "completions/mean_length": 929.1563110351562, "completions/mean_terminated_length": 579.90087890625, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.970262390670554, "grad_norm": 0.22495341300964355, "learning_rate": 1e-06, "loss": -0.0447, "num_tokens": 647052314.0, "reward": 0.6227678656578064, "reward_std": 0.11986783146858215, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1174 }, { "clip_ratio/high_max": 0.0020932027109665796, "clip_ratio/high_mean": 0.0006908383847985533, "clip_ratio/low_mean": 0.0004698266920968308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011606650878093205, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 867.8939819335938, "completions/mean_terminated_length": 547.0637817382812, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 10.979591836734693, "grad_norm": 0.5894312858581543, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 647591171.0, "reward": 0.6886160969734192, "reward_std": 0.14011836051940918, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331799030303955, "step": 1175 }, { "clip_ratio/high_max": 0.0019288870716991369, "clip_ratio/high_mean": 0.0007526243389293086, "clip_ratio/low_mean": 0.00043504604309418937, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011876703883899609, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3127.0, "completions/mean_length": 978.0859985351562, "completions/mean_terminated_length": 514.3961791992188, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 10.988921282798835, "grad_norm": 0.3286615312099457, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 648082280.0, "reward": 0.6651785969734192, "reward_std": 0.1680738478899002, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 1176 }, { "clip_ratio/high_max": 0.0017645190782786813, "clip_ratio/high_mean": 0.0007083440468704794, "clip_ratio/low_mean": 0.0002699465910609433, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009782906345208175, "completions/clipped_ratio": 0.11079545454545459, "completions/max_length": 4096.0, "completions/max_terminated_length": 1755.0, "completions/mean_length": 975.0966186523438, "completions/mean_terminated_length": 586.2300415039062, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 10.998250728862974, "grad_norm": 0.228280171751976, "learning_rate": 1e-06, "loss": -0.0402, "num_tokens": 648615781.0, "reward": 0.6082589626312256, "reward_std": 0.11532102525234222, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 1177 }, { "clip_ratio/high_max": 0.0017149225714092609, "clip_ratio/high_mean": 0.0007062563181534642, "clip_ratio/low_mean": 0.0002461648487042112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009524211800453486, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3793.0, "completions/mean_length": 1036.0670166015625, "completions/mean_terminated_length": 567.4285888671875, "completions/min_length": 188.0, "completions/min_terminated_length": 188.0, "epoch": 11.00932944606414, "grad_norm": 0.25313082337379456, "learning_rate": 1e-06, "loss": -0.0859, "num_tokens": 649150369.0, "reward": 0.6395089626312256, "reward_std": 0.13485869765281677, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1178 }, { "clip_ratio/high_max": 0.0015608993235218804, "clip_ratio/high_mean": 0.0006532988390972605, "clip_ratio/low_mean": 0.00038358610345312627, "clip_ratio/low_min": 1.3867317647964228e-05, "clip_ratio/region_mean": 0.0010368849434598815, "completions/clipped_ratio": 0.1439732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 1100.68310546875, "completions/mean_terminated_length": 596.9074096679688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 11.018658892128279, "grad_norm": 0.22719907760620117, "learning_rate": 1e-06, "loss": -0.0463, "num_tokens": 649699669.0, "reward": 0.598214328289032, "reward_std": 0.14011907577514648, "rewards/verify_math_reward/mean": 0.5982142686843872, "rewards/verify_math_reward/std": 0.49053287506103516, "step": 1179 }, { "clip_ratio/high_max": 0.001578674309712369, "clip_ratio/high_mean": 0.0005197370301175397, "clip_ratio/low_mean": 0.00040229929800261743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009220363372151041, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3810.0, "completions/mean_length": 1035.2388916015625, "completions/mean_terminated_length": 511.1085205078125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 11.02798833819242, "grad_norm": 0.4742317795753479, "learning_rate": 1e-06, "loss": -0.0464, "num_tokens": 650172035.0, "reward": 0.6350446939468384, "reward_std": 0.12835979461669922, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.4816865026950836, "step": 1180 }, { "clip_ratio/high_max": 0.0018998483137693256, "clip_ratio/high_mean": 0.0007278305856743827, "clip_ratio/low_mean": 0.00037418897682073293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011020195488526952, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2734.0, "completions/mean_length": 978.14404296875, "completions/mean_terminated_length": 500.6344909667969, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 11.03731778425656, "grad_norm": 0.3449740409851074, "learning_rate": 1e-06, "loss": -0.0711, "num_tokens": 650647180.0, "reward": 0.6462053656578064, "reward_std": 0.15537208318710327, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1181 }, { "clip_ratio/high_max": 0.0015139637580432463, "clip_ratio/high_mean": 0.0005015873794036452, "clip_ratio/low_mean": 0.0003548100041825819, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008563973942727898, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3161.0, "completions/mean_length": 1060.3739013671875, "completions/mean_terminated_length": 545.1893310546875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 11.0466472303207, "grad_norm": 0.21541571617126465, "learning_rate": 1e-06, "loss": -0.0705, "num_tokens": 651151259.0, "reward": 0.637276828289032, "reward_std": 0.12651576101779938, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1182 }, { "clip_ratio/high_max": 0.001237130420122412, "clip_ratio/high_mean": 0.00043247520306977094, "clip_ratio/low_mean": 0.0003234145438000269, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007558897486887872, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3711.0, "completions/mean_length": 864.3471069335938, "completions/mean_terminated_length": 534.42431640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.055976676384839, "grad_norm": 4.377254962921143, "learning_rate": 1e-06, "loss": -0.0372, "num_tokens": 651675130.0, "reward": 0.6941964626312256, "reward_std": 0.12456366419792175, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.46100425720214844, "step": 1183 }, { "clip_ratio/high_max": 0.0019570005460991524, "clip_ratio/high_mean": 0.0006469885356636951, "clip_ratio/low_mean": 0.0004330397459852975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010800282616401091, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3125.0, "completions/mean_length": 976.755615234375, "completions/mean_terminated_length": 562.6965942382812, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 11.06530612244898, "grad_norm": 1.472005844116211, "learning_rate": 1e-06, "loss": -0.0461, "num_tokens": 652216159.0, "reward": 0.6328125, "reward_std": 0.13154789805412292, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1184 }, { "clip_ratio/high_max": 0.0019672623384394683, "clip_ratio/high_mean": 0.0007594211947434815, "clip_ratio/low_mean": 0.00042961725557688624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011890384485013783, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3493.0, "completions/mean_length": 1023.700927734375, "completions/mean_terminated_length": 553.1685791015625, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 11.07463556851312, "grad_norm": 0.4396667778491974, "learning_rate": 1e-06, "loss": -0.0512, "num_tokens": 652735523.0, "reward": 0.6049107313156128, "reward_std": 0.16266193985939026, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1185 }, { "clip_ratio/high_max": 0.0020886435340798926, "clip_ratio/high_mean": 0.0005994874472889933, "clip_ratio/low_mean": 0.0002933752721219207, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008928627175919246, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3389.0, "completions/mean_length": 1083.154052734375, "completions/mean_terminated_length": 553.3359375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 11.08396501457726, "grad_norm": 0.1828635334968567, "learning_rate": 1e-06, "loss": -0.056, "num_tokens": 653252093.0, "reward": 0.5915178656578064, "reward_std": 0.11907366663217545, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1186 }, { "clip_ratio/high_max": 0.0019462131022009999, "clip_ratio/high_mean": 0.0007399519618047634, "clip_ratio/low_mean": 0.00045357361727837997, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001193525571579812, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2473.0, "completions/mean_length": 1007.90185546875, "completions/mean_terminated_length": 507.23736572265625, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.093294460641399, "grad_norm": 0.2509366571903229, "learning_rate": 1e-06, "loss": -0.076, "num_tokens": 653736245.0, "reward": 0.6729910969734192, "reward_std": 0.14992554485797882, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1187 }, { "clip_ratio/high_max": 0.0019079922931268811, "clip_ratio/high_mean": 0.0008365356461581541, "clip_ratio/low_mean": 0.00040288263244292466, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012394182631396689, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3518.0, "completions/mean_length": 982.1350708007812, "completions/mean_terminated_length": 582.1171264648438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.102623906705539, "grad_norm": 1.9869697093963623, "learning_rate": 1e-06, "loss": -0.0591, "num_tokens": 654298454.0, "reward": 0.6328125, "reward_std": 0.17085081338882446, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1188 }, { "clip_ratio/high_max": 0.0020058422014699318, "clip_ratio/high_mean": 0.0007086117475409992, "clip_ratio/low_mean": 0.00030152343833833584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010101351435878314, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2463.0, "completions/mean_length": 929.2332763671875, "completions/mean_terminated_length": 486.0470886230469, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 11.11195335276968, "grad_norm": 0.2725389301776886, "learning_rate": 1e-06, "loss": -0.0576, "num_tokens": 654755831.0, "reward": 0.6863839626312256, "reward_std": 0.1317012757062912, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422144770622253, "step": 1189 }, { "clip_ratio/high_max": 0.0017150229396065697, "clip_ratio/high_mean": 0.0006754527330485871, "clip_ratio/low_mean": 0.0003505973147639452, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010260500803269679, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 1104.3304443359375, "completions/mean_terminated_length": 536.1912231445312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 11.12128279883382, "grad_norm": 0.3129972219467163, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 655254359.0, "reward": 0.637276828289032, "reward_std": 0.15169291198253632, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1190 }, { "clip_ratio/high_max": 0.0015929283217701595, "clip_ratio/high_mean": 0.000467680783913238, "clip_ratio/low_mean": 0.00022705319588567363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006947339988983003, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2747.0, "completions/mean_length": 1011.0692138671875, "completions/mean_terminated_length": 496.9140625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.130612244897959, "grad_norm": 0.3865568935871124, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 655713061.0, "reward": 0.6517857313156128, "reward_std": 0.0892007052898407, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1191 }, { "clip_ratio/high_max": 0.0014956098784750793, "clip_ratio/high_mean": 0.000487194334709784, "clip_ratio/low_mean": 0.00025403599784112885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007412303348246496, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3757.0, "completions/mean_length": 1117.485595703125, "completions/mean_terminated_length": 570.572021484375, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 11.139941690962099, "grad_norm": 0.31990551948547363, "learning_rate": 1e-06, "loss": -0.0382, "num_tokens": 656248016.0, "reward": 0.551339328289032, "reward_std": 0.1107742190361023, "rewards/verify_math_reward/mean": 0.5513392686843872, "rewards/verify_math_reward/std": 0.4976350665092468, "step": 1192 }, { "clip_ratio/high_max": 0.002139115455065621, "clip_ratio/high_mean": 0.0006330867236101767, "clip_ratio/low_mean": 0.0003242451542746494, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009573318893671967, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3669.0, "completions/mean_length": 900.2042846679688, "completions/mean_terminated_length": 556.52783203125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 11.14927113702624, "grad_norm": 0.2887069880962372, "learning_rate": 1e-06, "loss": -0.0654, "num_tokens": 656782271.0, "reward": 0.6819196939468384, "reward_std": 0.1191510558128357, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599096059799194, "step": 1193 }, { "clip_ratio/high_max": 0.0018818424177879933, "clip_ratio/high_mean": 0.0006496303103631362, "clip_ratio/low_mean": 0.00044450499535741983, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010941352920781355, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3941.0, "completions/mean_length": 999.5480346679688, "completions/mean_terminated_length": 534.4839477539062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 11.15860058309038, "grad_norm": 0.2673981785774231, "learning_rate": 1e-06, "loss": -0.0632, "num_tokens": 657283130.0, "reward": 0.6830357313156128, "reward_std": 0.13711389899253845, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1194 }, { "clip_ratio/high_max": 0.002027008975346689, "clip_ratio/high_mean": 0.0006321159180515679, "clip_ratio/low_mean": 0.0004025810528673901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001034696979331784, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3522.0, "completions/mean_length": 957.07373046875, "completions/mean_terminated_length": 513.2254638671875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 11.167930029154519, "grad_norm": 0.23403170704841614, "learning_rate": 1e-06, "loss": -0.071, "num_tokens": 657787980.0, "reward": 0.6551339626312256, "reward_std": 0.1152033880352974, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1195 }, { "clip_ratio/high_max": 0.0016585695921094157, "clip_ratio/high_mean": 0.0006414505151042249, "clip_ratio/low_mean": 0.0005228253835412033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011642758909147233, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3893.0, "completions/mean_length": 981.00341796875, "completions/mean_terminated_length": 554.0748901367188, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.177259475218658, "grad_norm": 0.4831053912639618, "learning_rate": 1e-06, "loss": -0.0412, "num_tokens": 658303335.0, "reward": 0.6584821939468384, "reward_std": 0.14977401494979858, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1196 }, { "clip_ratio/high_max": 0.002041688341705594, "clip_ratio/high_mean": 0.0007752884885121603, "clip_ratio/low_mean": 0.0003015225431681756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010768110296339728, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2148.0, "completions/mean_length": 844.7533569335938, "completions/mean_terminated_length": 481.7109069824219, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.186588921282798, "grad_norm": 0.2372497320175171, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 658778250.0, "reward": 0.6729910969734192, "reward_std": 0.1347053349018097, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1197 }, { "clip_ratio/high_max": 0.0018739405095402617, "clip_ratio/high_mean": 0.0005323157165548764, "clip_ratio/low_mean": 0.00033523128422530135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008675469871377572, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 991.8795166015625, "completions/mean_terminated_length": 530.2410278320312, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 11.19591836734694, "grad_norm": 0.18299052119255066, "learning_rate": 1e-06, "loss": -0.0515, "num_tokens": 659276254.0, "reward": 0.6674107313156128, "reward_std": 0.10367163270711899, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140389680862427, "step": 1198 }, { "clip_ratio/high_max": 0.0013865770124539267, "clip_ratio/high_mean": 0.0004048795308335684, "clip_ratio/low_mean": 0.0002652573957675486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006701369184156647, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2709.0, "completions/mean_length": 911.5803833007812, "completions/mean_terminated_length": 516.0250854492188, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 11.205247813411079, "grad_norm": 0.1874816119670868, "learning_rate": 1e-06, "loss": -0.0405, "num_tokens": 659783238.0, "reward": 0.684151828289032, "reward_std": 0.10107900947332382, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1199 }, { "clip_ratio/high_max": 0.0018549531014286913, "clip_ratio/high_mean": 0.0007177303068601759, "clip_ratio/low_mean": 0.0003987469649473496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00111647726589581, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 886.935302734375, "completions/mean_terminated_length": 533.0235595703125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.214577259475218, "grad_norm": 0.2875356376171112, "learning_rate": 1e-06, "loss": -0.0482, "num_tokens": 660308596.0, "reward": 0.715401828289032, "reward_std": 0.15646544098854065, "rewards/verify_math_reward/mean": 0.7154017686843872, "rewards/verify_math_reward/std": 0.4514748752117157, "step": 1200 }, { "clip_ratio/high_max": 0.0015939300137688406, "clip_ratio/high_mean": 0.0005759891691923258, "clip_ratio/low_mean": 0.0005601890934485709, "clip_ratio/low_min": 1.6933079677983187e-05, "clip_ratio/region_mean": 0.001136178234446561, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2480.0, "completions/mean_length": 877.9141235351562, "completions/mean_terminated_length": 549.3763427734375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.223906705539358, "grad_norm": 0.3419424295425415, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 660837935.0, "reward": 0.6395089626312256, "reward_std": 0.14905862510204315, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1201 }, { "clip_ratio/high_max": 0.0017622757513890974, "clip_ratio/high_mean": 0.0006267519165703561, "clip_ratio/low_mean": 0.0004762813796332921, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011030332898371853, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3329.0, "completions/mean_length": 898.3035888671875, "completions/mean_terminated_length": 563.1565551757812, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 11.2332361516035, "grad_norm": 0.19576263427734375, "learning_rate": 1e-06, "loss": -0.0551, "num_tokens": 661381743.0, "reward": 0.6953125596046448, "reward_std": 0.1301603466272354, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1202 }, { "clip_ratio/high_max": 0.002221898721472826, "clip_ratio/high_mean": 0.0008242839394370094, "clip_ratio/low_mean": 0.0003053017858292151, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011295857293589506, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 946.91748046875, "completions/mean_terminated_length": 506.20611572265625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.242565597667639, "grad_norm": 0.31083205342292786, "learning_rate": 1e-06, "loss": -0.0937, "num_tokens": 661866053.0, "reward": 0.691964328289032, "reward_std": 0.1568765938282013, "rewards/verify_math_reward/mean": 0.6919642686843872, "rewards/verify_math_reward/std": 0.4619392454624176, "step": 1203 }, { "clip_ratio/high_max": 0.0025405114611203317, "clip_ratio/high_mean": 0.0008033487756620161, "clip_ratio/low_mean": 0.0003372556275280658, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001140604394095135, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2117.0, "completions/mean_length": 896.91748046875, "completions/mean_terminated_length": 508.5431823730469, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.251895043731778, "grad_norm": 0.243963360786438, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 662365531.0, "reward": 0.7109375596046448, "reward_std": 0.1301603466272354, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 1204 }, { "clip_ratio/high_max": 0.0023471055210393388, "clip_ratio/high_mean": 0.000682915258948924, "clip_ratio/low_mean": 0.0002559357396876294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009388509988639271, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1902.0, "completions/mean_length": 941.4788208007812, "completions/mean_terminated_length": 545.18212890625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.261224489795918, "grad_norm": 0.21252906322479248, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 662909056.0, "reward": 0.5837053656578064, "reward_std": 0.1277536004781723, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1205 }, { "clip_ratio/high_max": 0.0018565724603831768, "clip_ratio/high_mean": 0.0007533611615144764, "clip_ratio/low_mean": 0.00039845382980274735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001151814960394404, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3718.0, "completions/mean_length": 807.9185791015625, "completions/mean_terminated_length": 503.1695251464844, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 11.270553935860057, "grad_norm": 0.2475420981645584, "learning_rate": 1e-06, "loss": -0.0444, "num_tokens": 663412591.0, "reward": 0.676339328289032, "reward_std": 0.14905793964862823, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 1206 }, { "clip_ratio/high_max": 0.0022256549273151904, "clip_ratio/high_mean": 0.0008872234229784226, "clip_ratio/low_mean": 0.0003282145335106179, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012154379546700511, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2261.0, "completions/mean_length": 996.9074096679688, "completions/mean_terminated_length": 522.2715454101562, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 11.279883381924199, "grad_norm": 0.7276267409324646, "learning_rate": 1e-06, "loss": -0.0656, "num_tokens": 663904524.0, "reward": 0.6328125, "reward_std": 0.15413424372673035, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1207 }, { "clip_ratio/high_max": 0.0013603698025690392, "clip_ratio/high_mean": 0.0004850970781262731, "clip_ratio/low_mean": 0.00046215504994506773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009472521396673983, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2451.0, "completions/mean_length": 861.8348388671875, "completions/mean_terminated_length": 518.454345703125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 11.289212827988338, "grad_norm": 0.38679319620132446, "learning_rate": 1e-06, "loss": -0.0291, "num_tokens": 664417440.0, "reward": 0.637276828289032, "reward_std": 0.11329153180122375, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1208 }, { "clip_ratio/high_max": 0.001770718543411931, "clip_ratio/high_mean": 0.0006499004157376476, "clip_ratio/low_mean": 0.0002893336481974984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009392340580234304, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3344.0, "completions/mean_length": 799.7678833007812, "completions/mean_terminated_length": 476.60784912109375, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 11.298542274052478, "grad_norm": 0.2946408987045288, "learning_rate": 1e-06, "loss": -0.0565, "num_tokens": 664894040.0, "reward": 0.6662946939468384, "reward_std": 0.11028888076543808, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1209 }, { "clip_ratio/high_max": 0.0017693006157060154, "clip_ratio/high_mean": 0.0006510997354780557, "clip_ratio/low_mean": 0.0002872747079436522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009383744290971663, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3840.0, "completions/mean_length": 755.3292846679688, "completions/mean_terminated_length": 502.6734619140625, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 11.307871720116617, "grad_norm": 0.20046192407608032, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 665392023.0, "reward": 0.754464328289032, "reward_std": 0.10318448394536972, "rewards/verify_math_reward/mean": 0.7544642686843872, "rewards/verify_math_reward/std": 0.4306447505950928, "step": 1210 }, { "clip_ratio/high_max": 0.0023193726847239304, "clip_ratio/high_mean": 0.0007375519835477462, "clip_ratio/low_mean": 0.00037769984646729426, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011152518127346411, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2980.0, "completions/mean_length": 1107.86279296875, "completions/mean_terminated_length": 577.7726440429688, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 11.317201166180759, "grad_norm": 0.27446576952934265, "learning_rate": 1e-06, "loss": -0.0533, "num_tokens": 665931924.0, "reward": 0.5837053656578064, "reward_std": 0.13403385877609253, "rewards/verify_math_reward/mean": 0.5837053656578064, "rewards/verify_math_reward/std": 0.49321895837783813, "step": 1211 }, { "clip_ratio/high_max": 0.0021604953799396753, "clip_ratio/high_mean": 0.0007509531460527796, "clip_ratio/low_mean": 0.0003034861226751673, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010544392389419954, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 929.0960083007812, "completions/mean_terminated_length": 499.6172180175781, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.326530612244898, "grad_norm": 0.2643359899520874, "learning_rate": 1e-06, "loss": -0.0468, "num_tokens": 666412626.0, "reward": 0.6886160969734192, "reward_std": 0.13193020224571228, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1212 }, { "clip_ratio/high_max": 0.0015155598448473029, "clip_ratio/high_mean": 0.0005129636801939341, "clip_ratio/low_mean": 0.00033966854152822634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008526322199031711, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 1765.0, "completions/mean_length": 801.5859985351562, "completions/mean_terminated_length": 500.6346130371094, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 11.335860058309038, "grad_norm": 0.27162256836891174, "learning_rate": 1e-06, "loss": -0.0225, "num_tokens": 666900975.0, "reward": 0.7198660969734192, "reward_std": 0.11047180742025375, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931530952453613, "step": 1213 }, { "clip_ratio/high_max": 0.0015463138843188062, "clip_ratio/high_mean": 0.0005077983978480916, "clip_ratio/low_mean": 0.00028306973149483383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007908681291155517, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 850.4688110351562, "completions/mean_terminated_length": 505.8815002441406, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 11.345189504373177, "grad_norm": 0.182473286986351, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 667398955.0, "reward": 0.6127232313156128, "reward_std": 0.09709673374891281, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1214 }, { "clip_ratio/high_max": 0.002268584561534226, "clip_ratio/high_mean": 0.0007521238931076368, "clip_ratio/low_mean": 0.0005650601628985896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013171840928407619, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1998.0, "completions/mean_length": 1009.47216796875, "completions/mean_terminated_length": 568.53955078125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 11.354518950437317, "grad_norm": 0.27947527170181274, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 667939146.0, "reward": 0.65625, "reward_std": 0.14466407895088196, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1215 }, { "clip_ratio/high_max": 0.001618959940969944, "clip_ratio/high_mean": 0.0006013408369653916, "clip_ratio/low_mean": 0.00033932765779809415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009406685057911091, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 947.9922485351562, "completions/mean_terminated_length": 570.2312622070312, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 11.363848396501458, "grad_norm": 0.23285862803459167, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 668477155.0, "reward": 0.6283482313156128, "reward_std": 0.13350322842597961, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159182548523, "step": 1216 }, { "clip_ratio/high_max": 0.0015124055862543173, "clip_ratio/high_mean": 0.0005476295182234026, "clip_ratio/low_mean": 0.00036976215733375284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009173916914733127, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3562.0, "completions/mean_length": 963.3270263671875, "completions/mean_terminated_length": 524.9122314453125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.373177842565598, "grad_norm": 0.26881372928619385, "learning_rate": 1e-06, "loss": -0.0432, "num_tokens": 668989984.0, "reward": 0.6830357313156128, "reward_std": 0.13016216456890106, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1217 }, { "clip_ratio/high_max": 0.0014185413492668886, "clip_ratio/high_mean": 0.0005190686224523233, "clip_ratio/low_mean": 0.00032771823771327036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008467868610750884, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3212.0, "completions/mean_length": 1001.3504638671875, "completions/mean_terminated_length": 541.1205444335938, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 11.382507288629737, "grad_norm": 0.27219751477241516, "learning_rate": 1e-06, "loss": -0.0392, "num_tokens": 669507074.0, "reward": 0.6595982313156128, "reward_std": 0.11855372786521912, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 1218 }, { "clip_ratio/high_max": 0.0015868601949478034, "clip_ratio/high_mean": 0.0005713184427804663, "clip_ratio/low_mean": 0.00044737579719367204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010186942463406012, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2285.0, "completions/mean_length": 871.8114013671875, "completions/mean_terminated_length": 511.7903137207031, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.391836734693877, "grad_norm": 0.29111766815185547, "learning_rate": 1e-06, "loss": -0.0404, "num_tokens": 670009601.0, "reward": 0.6439732313156128, "reward_std": 0.11663442850112915, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1219 }, { "clip_ratio/high_max": 0.001709231373752118, "clip_ratio/high_mean": 0.0006412777138393722, "clip_ratio/low_mean": 0.0003243132555326156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000965590948908357, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 907.5926513671875, "completions/mean_terminated_length": 524.9837646484375, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 11.401166180758018, "grad_norm": 0.21829597651958466, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 670517492.0, "reward": 0.6473214626312256, "reward_std": 0.13324150443077087, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1220 }, { "clip_ratio/high_max": 0.0018303882025065832, "clip_ratio/high_mean": 0.0007279628689502715, "clip_ratio/low_mean": 0.00040208381346928945, "clip_ratio/low_min": 3.4722223062999547e-05, "clip_ratio/region_mean": 0.0011300467122055124, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2121.0, "completions/mean_length": 1006.7991333007812, "completions/mean_terminated_length": 551.9231567382812, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 11.410495626822158, "grad_norm": 0.2785889804363251, "learning_rate": 1e-06, "loss": -0.043, "num_tokens": 671030944.0, "reward": 0.6183035969734192, "reward_std": 0.14613084495067596, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1221 }, { "clip_ratio/high_max": 0.0018251664550916757, "clip_ratio/high_mean": 0.0006509021695819683, "clip_ratio/low_mean": 0.0003536196527420543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010045218368759379, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2393.0, "completions/mean_length": 1002.72216796875, "completions/mean_terminated_length": 556.310302734375, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 11.419825072886297, "grad_norm": 0.2255057394504547, "learning_rate": 1e-06, "loss": -0.0926, "num_tokens": 671553927.0, "reward": 0.6618303656578064, "reward_std": 0.14451110363006592, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1222 }, { "clip_ratio/high_max": 0.0018701410735957325, "clip_ratio/high_mean": 0.0006567813634319464, "clip_ratio/low_mean": 0.0004231369225635717, "clip_ratio/low_min": 1.3898154065827839e-05, "clip_ratio/region_mean": 0.0010799182800838025, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2782.0, "completions/mean_length": 911.1629638671875, "completions/mean_terminated_length": 598.9240112304688, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 11.429154518950437, "grad_norm": 0.2082158774137497, "learning_rate": 1e-06, "loss": -0.015, "num_tokens": 672122753.0, "reward": 0.6417410969734192, "reward_std": 0.13200436532497406, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1223 }, { "clip_ratio/high_max": 0.0015425887959281681, "clip_ratio/high_mean": 0.00043351100589461566, "clip_ratio/low_mean": 0.00037760764553240733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008111186634778278, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3173.0, "completions/mean_length": 865.2266235351562, "completions/mean_terminated_length": 491.0522766113281, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.438483965014576, "grad_norm": 0.22037480771541595, "learning_rate": 1e-06, "loss": -0.0654, "num_tokens": 672601212.0, "reward": 0.6741071939468384, "reward_std": 0.11234975606203079, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692258834839, "step": 1224 }, { "clip_ratio/high_max": 0.0014426990892388858, "clip_ratio/high_mean": 0.0005319876636349363, "clip_ratio/low_mean": 0.00028383291578393255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008158205673680641, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2694.0, "completions/mean_length": 1066.0670166015625, "completions/mean_terminated_length": 574.8327026367188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.447813411078718, "grad_norm": 1.8511788845062256, "learning_rate": 1e-06, "loss": -0.0309, "num_tokens": 673136368.0, "reward": 0.6316964626312256, "reward_std": 0.12253418564796448, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1225 }, { "clip_ratio/high_max": 0.002710782464419026, "clip_ratio/high_mean": 0.0009395659435540438, "clip_ratio/low_mean": 0.0004538843431873829, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013934503076598048, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2999.0, "completions/mean_length": 969.044677734375, "completions/mean_terminated_length": 531.4300537109375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 11.457142857142857, "grad_norm": 1.7392948865890503, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 673630488.0, "reward": 0.6540178656578064, "reward_std": 0.15582531690597534, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1226 }, { "clip_ratio/high_max": 0.002148006526113022, "clip_ratio/high_mean": 0.0008222622636822052, "clip_ratio/low_mean": 0.00033328503150187316, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011555473138287198, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2454.0, "completions/mean_length": 887.09716796875, "completions/mean_terminated_length": 515.4557495117188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.466472303206997, "grad_norm": 0.2571759521961212, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 674126407.0, "reward": 0.6662946939468384, "reward_std": 0.14684367179870605, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1227 }, { "clip_ratio/high_max": 0.0019398934964556247, "clip_ratio/high_mean": 0.000737077415578824, "clip_ratio/low_mean": 0.0002981383008773264, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010352157023589825, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 1102.921875, "completions/mean_terminated_length": 525.0305786132812, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 11.475801749271136, "grad_norm": 0.2206743210554123, "learning_rate": 1e-06, "loss": -0.0705, "num_tokens": 674605305.0, "reward": 0.613839328289032, "reward_std": 0.14004239439964294, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1228 }, { "clip_ratio/high_max": 0.0017158561931864824, "clip_ratio/high_mean": 0.0005499132798831852, "clip_ratio/low_mean": 0.0003985656717304664, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009484789752605138, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2172.0, "completions/mean_length": 895.8482666015625, "completions/mean_terminated_length": 475.6262512207031, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 11.485131195335278, "grad_norm": 0.2786537706851959, "learning_rate": 1e-06, "loss": -0.0511, "num_tokens": 675054073.0, "reward": 0.6852678656578064, "reward_std": 0.13203828036785126, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1229 }, { "clip_ratio/high_max": 0.002075848875392694, "clip_ratio/high_mean": 0.0008127826840791386, "clip_ratio/low_mean": 0.00030058499078222667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011133676816825755, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3280.0, "completions/mean_length": 886.1685791015625, "completions/mean_terminated_length": 549.7496948242188, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 11.494460641399417, "grad_norm": 3.2610578536987305, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 675591776.0, "reward": 0.6875000596046448, "reward_std": 0.1626594066619873, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1230 }, { "clip_ratio/high_max": 0.002299333555129124, "clip_ratio/high_mean": 0.000734691251636832, "clip_ratio/low_mean": 0.00036892001207888825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011036112609872362, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2260.0, "completions/mean_length": 958.3582763671875, "completions/mean_terminated_length": 523.7928466796875, "completions/min_length": 106.0, "completions/min_terminated_length": 106.0, "epoch": 11.503790087463557, "grad_norm": 0.5798492431640625, "learning_rate": 1e-06, "loss": -0.0616, "num_tokens": 676097265.0, "reward": 0.6462053656578064, "reward_std": 0.13019494712352753, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1231 }, { "clip_ratio/high_max": 0.0020301885961089283, "clip_ratio/high_mean": 0.0006928197844899842, "clip_ratio/low_mean": 0.0004166926501056878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001109512410039315, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3057.0, "completions/mean_length": 863.380615234375, "completions/mean_terminated_length": 537.73583984375, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 11.513119533527696, "grad_norm": 0.3225386440753937, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 676626278.0, "reward": 0.6395089626312256, "reward_std": 0.1191510558128357, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1232 }, { "clip_ratio/high_max": 0.002011083000979852, "clip_ratio/high_mean": 0.0007108403224265203, "clip_ratio/low_mean": 0.0002939858062518397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001004826135613257, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2558.0, "completions/mean_length": 935.97998046875, "completions/mean_terminated_length": 534.5182495117188, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 11.522448979591836, "grad_norm": 0.2137346714735031, "learning_rate": 1e-06, "loss": -0.0589, "num_tokens": 677133660.0, "reward": 0.6696428656578064, "reward_std": 0.14473934471607208, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1233 }, { "clip_ratio/high_max": 0.0016349138531950302, "clip_ratio/high_mean": 0.0005710250661650207, "clip_ratio/low_mean": 0.0003529160185280489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009239410883310484, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2994.0, "completions/mean_length": 926.4553833007812, "completions/mean_terminated_length": 554.9625854492188, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.531778425655977, "grad_norm": 0.1969604343175888, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 677670220.0, "reward": 0.6819196939468384, "reward_std": 0.12422666698694229, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1234 }, { "clip_ratio/high_max": 0.001964829079952324, "clip_ratio/high_mean": 0.000721224840162904, "clip_ratio/low_mean": 0.0002398229823938891, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009610478173271986, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3976.0, "completions/mean_length": 1073.9788818359375, "completions/mean_terminated_length": 570.30859375, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 11.541107871720117, "grad_norm": 0.22050292789936066, "learning_rate": 1e-06, "loss": -0.07, "num_tokens": 678196345.0, "reward": 0.6127232313156128, "reward_std": 0.13752757012844086, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1235 }, { "clip_ratio/high_max": 0.0018077477980114054, "clip_ratio/high_mean": 0.0006138875123724574, "clip_ratio/low_mean": 0.0003746912752831122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009885787840175908, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3746.0, "completions/mean_length": 1013.685302734375, "completions/mean_terminated_length": 582.3180541992188, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 11.550437317784256, "grad_norm": 0.2914097011089325, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 678738279.0, "reward": 0.6573660969734192, "reward_std": 0.12317540496587753, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1236 }, { "clip_ratio/high_max": 0.0020769256880157627, "clip_ratio/high_mean": 0.0007838482542865677, "clip_ratio/low_mean": 0.0006062004013074329, "clip_ratio/low_min": 1.75168170244433e-05, "clip_ratio/region_mean": 0.0013900486737838946, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3955.0, "completions/mean_length": 977.091552734375, "completions/mean_terminated_length": 545.1206665039062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.559766763848396, "grad_norm": 0.3013603985309601, "learning_rate": 1e-06, "loss": -0.0728, "num_tokens": 679272337.0, "reward": 0.6428571939468384, "reward_std": 0.156651571393013, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1237 }, { "clip_ratio/high_max": 0.0019671481240948197, "clip_ratio/high_mean": 0.0007069959083310096, "clip_ratio/low_mean": 0.00036611780433304375, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001073113686288707, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2136.0, "completions/mean_length": 891.2109985351562, "completions/mean_terminated_length": 542.1744995117188, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 11.569096209912537, "grad_norm": 0.3788949251174927, "learning_rate": 1e-06, "loss": -0.0621, "num_tokens": 679798614.0, "reward": 0.6875000596046448, "reward_std": 0.15507742762565613, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1238 }, { "clip_ratio/high_max": 0.0014797958465351257, "clip_ratio/high_mean": 0.000537880518095335, "clip_ratio/low_mean": 0.0003793309106185916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009172114514512941, "completions/clipped_ratio": 0.0736607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 793.6295166015625, "completions/mean_terminated_length": 531.0313110351562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 11.578425655976677, "grad_norm": 0.39991965889930725, "learning_rate": 1e-06, "loss": -0.0315, "num_tokens": 680326818.0, "reward": 0.6953125596046448, "reward_std": 0.12628935277462006, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1239 }, { "clip_ratio/high_max": 0.0015092982448550174, "clip_ratio/high_mean": 0.0005479157616719021, "clip_ratio/low_mean": 0.0001647114479510492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007126272200821404, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2524.0, "completions/mean_length": 1020.1842041015625, "completions/mean_terminated_length": 512.2145385742188, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.587755102040816, "grad_norm": 0.27111905813217163, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 680815063.0, "reward": 0.6540178656578064, "reward_std": 0.0922047421336174, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1240 }, { "clip_ratio/high_max": 0.0017421358643332496, "clip_ratio/high_mean": 0.0006400034226317075, "clip_ratio/low_mean": 0.0006353565436256758, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012753599276038585, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2287.0, "completions/mean_length": 963.450927734375, "completions/mean_terminated_length": 525.053466796875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 11.597084548104956, "grad_norm": 0.29176339507102966, "learning_rate": 1e-06, "loss": -0.0386, "num_tokens": 681332923.0, "reward": 0.625, "reward_std": 0.13470715284347534, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1241 }, { "clip_ratio/high_max": 0.001956000065547414, "clip_ratio/high_mean": 0.0006792026015318697, "clip_ratio/low_mean": 0.00037941193295409903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010586145435809158, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 940.56591796875, "completions/mean_terminated_length": 548.6110229492188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 11.606413994169095, "grad_norm": 0.23168037831783295, "learning_rate": 1e-06, "loss": -0.0385, "num_tokens": 681872646.0, "reward": 0.59375, "reward_std": 0.13887983560562134, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1242 }, { "clip_ratio/high_max": 0.0012582398649101378, "clip_ratio/high_mean": 0.00035168908289051615, "clip_ratio/low_mean": 0.00027861314674737514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006303022291831439, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2416.0, "completions/mean_length": 991.114990234375, "completions/mean_terminated_length": 547.5599365234375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 11.615743440233237, "grad_norm": 0.40971654653549194, "learning_rate": 1e-06, "loss": -0.0246, "num_tokens": 682390493.0, "reward": 0.6305803656578064, "reward_std": 0.0925096645951271, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1243 }, { "clip_ratio/high_max": 0.0018434973389958031, "clip_ratio/high_mean": 0.0006250663341234031, "clip_ratio/low_mean": 0.0002022051241965528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008272714767372236, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3189.0, "completions/mean_length": 910.8951416015625, "completions/mean_terminated_length": 515.2546997070312, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.625072886297376, "grad_norm": 0.477720707654953, "learning_rate": 1e-06, "loss": -0.059, "num_tokens": 682889359.0, "reward": 0.738839328289032, "reward_std": 0.10911563783884048, "rewards/verify_math_reward/mean": 0.7388392686843872, "rewards/verify_math_reward/std": 0.439512699842453, "step": 1244 }, { "clip_ratio/high_max": 0.0018341023751418106, "clip_ratio/high_mean": 0.0005889217791263945, "clip_ratio/low_mean": 0.00040872691602089617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000997648705379106, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3530.0, "completions/mean_length": 984.3917846679688, "completions/mean_terminated_length": 557.9276733398438, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.634402332361516, "grad_norm": 0.3170127272605896, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 683416678.0, "reward": 0.6439732313156128, "reward_std": 0.1159629374742508, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909072041511536, "step": 1245 }, { "clip_ratio/high_max": 0.0016940337845881004, "clip_ratio/high_mean": 0.0006195336763994419, "clip_ratio/low_mean": 0.00040580420227342984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010253378804918611, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3234.0, "completions/mean_length": 1029.696533203125, "completions/mean_terminated_length": 550.9574584960938, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 11.643731778425655, "grad_norm": 0.32127144932746887, "learning_rate": 1e-06, "loss": -0.0694, "num_tokens": 683932014.0, "reward": 0.6205357313156128, "reward_std": 0.15270251035690308, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1246 }, { "clip_ratio/high_max": 0.0016594558692304417, "clip_ratio/high_mean": 0.000595131909904012, "clip_ratio/low_mean": 0.0003704376595123904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009655695648689289, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3752.0, "completions/mean_length": 953.7344360351562, "completions/mean_terminated_length": 523.0685424804688, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.653061224489797, "grad_norm": 0.24881182610988617, "learning_rate": 1e-06, "loss": -0.0798, "num_tokens": 684433376.0, "reward": 0.6629464626312256, "reward_std": 0.1361381858587265, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1247 }, { "clip_ratio/high_max": 0.0023023031317279674, "clip_ratio/high_mean": 0.0010129041438631248, "clip_ratio/low_mean": 0.00036400539784153807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013769095603493042, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3806.0, "completions/mean_length": 1055.55810546875, "completions/mean_terminated_length": 511.4789733886719, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.662390670553936, "grad_norm": 0.2707832157611847, "learning_rate": 1e-06, "loss": -0.0906, "num_tokens": 684902212.0, "reward": 0.65625, "reward_std": 0.16604438424110413, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1248 }, { "clip_ratio/high_max": 0.0018992138502653688, "clip_ratio/high_mean": 0.0005970289803371998, "clip_ratio/low_mean": 0.0004722713022147218, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010693002732296009, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3183.0, "completions/mean_length": 1094.62841796875, "completions/mean_terminated_length": 552.8787841796875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.671720116618076, "grad_norm": 0.3415383994579315, "learning_rate": 1e-06, "loss": -0.0579, "num_tokens": 685406431.0, "reward": 0.6328125, "reward_std": 0.15638872981071472, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1249 }, { "clip_ratio/high_max": 0.0018159186165576102, "clip_ratio/high_mean": 0.0006874466489534825, "clip_ratio/low_mean": 0.0004209075045764621, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011083541467087343, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3336.0, "completions/mean_length": 974.4420166015625, "completions/mean_terminated_length": 551.11279296875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.681049562682215, "grad_norm": 0.5837641954421997, "learning_rate": 1e-06, "loss": -0.0623, "num_tokens": 685932931.0, "reward": 0.6149553656578064, "reward_std": 0.14091001451015472, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1250 }, { "clip_ratio/high_max": 0.0017719335919537116, "clip_ratio/high_mean": 0.0006121484057075577, "clip_ratio/low_mean": 0.0004516656276791764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010638140465744073, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2529.0, "completions/mean_length": 981.82373046875, "completions/mean_terminated_length": 577.3341674804688, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 11.690379008746355, "grad_norm": 33.79458236694336, "learning_rate": 1e-06, "loss": -0.028, "num_tokens": 686482749.0, "reward": 0.5892857313156128, "reward_std": 0.15112948417663574, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1251 }, { "clip_ratio/high_max": 0.001680741032032529, "clip_ratio/high_mean": 0.0006303282416411093, "clip_ratio/low_mean": 0.0002803527818286966, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009106810484809102, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3023.0, "completions/mean_length": 853.2388916015625, "completions/mean_terminated_length": 513.36865234375, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.699708454810496, "grad_norm": 0.3022686243057251, "learning_rate": 1e-06, "loss": -0.0653, "num_tokens": 686995531.0, "reward": 0.723214328289032, "reward_std": 0.12952165305614471, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1252 }, { "clip_ratio/high_max": 0.0016854898094607051, "clip_ratio/high_mean": 0.0006491483527497621, "clip_ratio/low_mean": 0.0002343011179846144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008834494910843205, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3098.0, "completions/mean_length": 949.6585083007812, "completions/mean_terminated_length": 563.2656860351562, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.709037900874636, "grad_norm": 0.17894645035266876, "learning_rate": 1e-06, "loss": -0.0739, "num_tokens": 687531945.0, "reward": 0.6908482313156128, "reward_std": 0.13361947238445282, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1253 }, { "clip_ratio/high_max": 0.0015656596842745785, "clip_ratio/high_mean": 0.0005110302818138734, "clip_ratio/low_mean": 0.0003575194359655143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008685497214173665, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 1021.6339721679688, "completions/mean_terminated_length": 541.6361694335938, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 11.718367346938775, "grad_norm": 1.2602882385253906, "learning_rate": 1e-06, "loss": -0.0486, "num_tokens": 688041305.0, "reward": 0.6037946939468384, "reward_std": 0.11614587157964706, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1254 }, { "clip_ratio/high_max": 0.0017286953152506612, "clip_ratio/high_mean": 0.0005943849428149406, "clip_ratio/low_mean": 0.0003327131453261245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009270980845030863, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 1068.630615234375, "completions/mean_terminated_length": 564.0690307617188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 11.727696793002915, "grad_norm": 0.2530302107334137, "learning_rate": 1e-06, "loss": -0.0692, "num_tokens": 688569622.0, "reward": 0.6261160969734192, "reward_std": 0.14142926037311554, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1255 }, { "clip_ratio/high_max": 0.001845830185629893, "clip_ratio/high_mean": 0.0006954295513423858, "clip_ratio/low_mean": 0.00024846552241797326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009438950601179386, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2203.0, "completions/mean_length": 979.6160888671875, "completions/mean_terminated_length": 525.3094482421875, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 11.737026239067056, "grad_norm": 0.45033928751945496, "learning_rate": 1e-06, "loss": -0.0707, "num_tokens": 689072446.0, "reward": 0.6462053656578064, "reward_std": 0.12474842369556427, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1256 }, { "clip_ratio/high_max": 0.0014097109124122653, "clip_ratio/high_mean": 0.000530242428794736, "clip_ratio/low_mean": 0.00046636186380055733, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009966042889573146, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2693.0, "completions/mean_length": 971.5513916015625, "completions/mean_terminated_length": 579.0326538085938, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.746355685131196, "grad_norm": 0.343485027551651, "learning_rate": 1e-06, "loss": -0.0321, "num_tokens": 689624204.0, "reward": 0.6272321939468384, "reward_std": 0.13159321248531342, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1257 }, { "clip_ratio/high_max": 0.001955183852260234, "clip_ratio/high_mean": 0.0006714449082210194, "clip_ratio/low_mean": 0.00032876396107894834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010002088820328936, "completions/clipped_ratio": 0.1595982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3088.0, "completions/mean_length": 1104.8984375, "completions/mean_terminated_length": 536.8671875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.755685131195335, "grad_norm": 0.2291107475757599, "learning_rate": 1e-06, "loss": -0.0566, "num_tokens": 690123401.0, "reward": 0.5959821939468384, "reward_std": 0.12407512217760086, "rewards/verify_math_reward/mean": 0.5959821343421936, "rewards/verify_math_reward/std": 0.4909749925136566, "step": 1258 }, { "clip_ratio/high_max": 0.002204755186539842, "clip_ratio/high_mean": 0.0006613614950765623, "clip_ratio/low_mean": 0.000552472462914011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012138339479861315, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 982.8013916015625, "completions/mean_terminated_length": 542.591064453125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 11.765014577259475, "grad_norm": 0.7464296221733093, "learning_rate": 1e-06, "loss": -0.0561, "num_tokens": 690640767.0, "reward": 0.6428571939468384, "reward_std": 0.15244035422801971, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1259 }, { "clip_ratio/high_max": 0.0015923376358841779, "clip_ratio/high_mean": 0.0005037416613049572, "clip_ratio/low_mean": 0.00041081587005464826, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009145575368165737, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3388.0, "completions/mean_length": 765.8739013671875, "completions/mean_terminated_length": 514.0155639648438, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 11.774344023323614, "grad_norm": 0.33046114444732666, "learning_rate": 1e-06, "loss": -0.0238, "num_tokens": 691153182.0, "reward": 0.7031250596046448, "reward_std": 0.1176094189286232, "rewards/verify_math_reward/mean": 0.703125, "rewards/verify_math_reward/std": 0.4571361541748047, "step": 1260 }, { "clip_ratio/high_max": 0.0019095545285381377, "clip_ratio/high_mean": 0.000722666032743291, "clip_ratio/low_mean": 0.0004577948702717549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00118046090574353, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2437.0, "completions/mean_length": 763.935302734375, "completions/mean_terminated_length": 494.6369323730469, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 11.783673469387756, "grad_norm": 0.48128291964530945, "learning_rate": 1e-06, "loss": -0.0271, "num_tokens": 691646292.0, "reward": 0.7109375596046448, "reward_std": 0.15439637005329132, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 1261 }, { "clip_ratio/high_max": 0.0018459937164152507, "clip_ratio/high_mean": 0.0006099835327404435, "clip_ratio/low_mean": 0.00023658017335037584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008465637074550614, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2382.0, "completions/mean_length": 926.06591796875, "completions/mean_terminated_length": 505.27813720703125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.793002915451895, "grad_norm": 0.2681536078453064, "learning_rate": 1e-06, "loss": -0.0702, "num_tokens": 692132271.0, "reward": 0.699776828289032, "reward_std": 0.13233955204486847, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586108922958374, "step": 1262 }, { "clip_ratio/high_max": 0.0020512671435426455, "clip_ratio/high_mean": 0.0007380642273346893, "clip_ratio/low_mean": 0.0005563448394241277, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012944090449309442, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 1052.86279296875, "completions/mean_terminated_length": 577.7406616210938, "completions/min_length": 172.0, "completions/min_terminated_length": 172.0, "epoch": 11.802332361516035, "grad_norm": 0.269562691450119, "learning_rate": 1e-06, "loss": -0.0302, "num_tokens": 692665740.0, "reward": 0.6183035969734192, "reward_std": 0.15356972813606262, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1263 }, { "clip_ratio/high_max": 0.0021905639296164736, "clip_ratio/high_mean": 0.0007840562648198102, "clip_ratio/low_mean": 0.00043938425301348616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012234405403432902, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3016.0, "completions/mean_length": 964.1864013671875, "completions/mean_terminated_length": 534.9530639648438, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.811661807580174, "grad_norm": 0.29455795884132385, "learning_rate": 1e-06, "loss": -0.0888, "num_tokens": 693174363.0, "reward": 0.6863839626312256, "reward_std": 0.15064051747322083, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 1264 }, { "clip_ratio/high_max": 0.0012735659529425902, "clip_ratio/high_mean": 0.0003974678108988883, "clip_ratio/low_mean": 0.0003051622916245833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007026301009318558, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2422.0, "completions/mean_length": 1124.55029296875, "completions/mean_terminated_length": 536.6163330078125, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 11.820991253644316, "grad_norm": 0.2692880630493164, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 693664416.0, "reward": 0.6183035969734192, "reward_std": 0.10562442243099213, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1265 }, { "clip_ratio/high_max": 0.0020158496881776955, "clip_ratio/high_mean": 0.0006687959939881694, "clip_ratio/low_mean": 0.0004615484576788731, "clip_ratio/low_min": 1.9635563148767687e-05, "clip_ratio/region_mean": 0.001130344440753106, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 1009.591552734375, "completions/mean_terminated_length": 573.16943359375, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 11.830320699708455, "grad_norm": 0.2503564655780792, "learning_rate": 1e-06, "loss": -0.0615, "num_tokens": 694197394.0, "reward": 0.5892857313156128, "reward_std": 0.16093555092811584, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1266 }, { "clip_ratio/high_max": 0.0014823563906247728, "clip_ratio/high_mean": 0.0005556269861699548, "clip_ratio/low_mean": 0.0004243018215674965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009799288218346192, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2915.0, "completions/mean_length": 956.130615234375, "completions/mean_terminated_length": 548.3038940429688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 11.839650145772595, "grad_norm": 1.6916627883911133, "learning_rate": 1e-06, "loss": -0.0212, "num_tokens": 694724591.0, "reward": 0.6595982313156128, "reward_std": 0.12223179638385773, "rewards/verify_math_reward/mean": 0.6595982313156128, "rewards/verify_math_reward/std": 0.4741089344024658, "step": 1267 }, { "clip_ratio/high_max": 0.002956606214866042, "clip_ratio/high_mean": 0.0009120819522649981, "clip_ratio/low_mean": 0.0005626539095828775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014747358582098968, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2721.0, "completions/mean_length": 924.4553833007812, "completions/mean_terminated_length": 517.0277099609375, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.848979591836734, "grad_norm": 0.2784743905067444, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 695238159.0, "reward": 0.6361607313156128, "reward_std": 0.13914084434509277, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1268 }, { "clip_ratio/high_max": 0.0021453729314089287, "clip_ratio/high_mean": 0.0007798439692123793, "clip_ratio/low_mean": 0.00044218389143679815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001222027865878772, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2421.0, "completions/mean_length": 854.2511596679688, "completions/mean_terminated_length": 510.0654602050781, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.858309037900874, "grad_norm": 0.39456063508987427, "learning_rate": 1e-06, "loss": -0.0231, "num_tokens": 695739280.0, "reward": 0.637276828289032, "reward_std": 0.1486467719078064, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1269 }, { "clip_ratio/high_max": 0.001790165355487261, "clip_ratio/high_mean": 0.0006870819424875663, "clip_ratio/low_mean": 0.00032956657150862156, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010166485153604299, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3141.0, "completions/mean_length": 889.1663208007812, "completions/mean_terminated_length": 517.7645874023438, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 11.867638483965015, "grad_norm": 0.5777519941329956, "learning_rate": 1e-06, "loss": -0.0527, "num_tokens": 696246581.0, "reward": 0.6339285969734192, "reward_std": 0.13069167733192444, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1270 }, { "clip_ratio/high_max": 0.0021682983788195997, "clip_ratio/high_mean": 0.0007505781686631963, "clip_ratio/low_mean": 0.00047273177187889814, "clip_ratio/low_min": 1.694685488473624e-05, "clip_ratio/region_mean": 0.001223309966007946, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 896.607177734375, "completions/mean_terminated_length": 503.6992492675781, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 11.876967930029155, "grad_norm": 0.24833278357982635, "learning_rate": 1e-06, "loss": -0.0271, "num_tokens": 696744933.0, "reward": 0.6941964626312256, "reward_std": 0.12464035302400589, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1271 }, { "clip_ratio/high_max": 0.002244793126010336, "clip_ratio/high_mean": 0.0007893917318142485, "clip_ratio/low_mean": 0.00043416947755758883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012235612084623426, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3543.0, "completions/mean_length": 997.3404541015625, "completions/mean_terminated_length": 531.94482421875, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 11.886297376093294, "grad_norm": 0.33796581625938416, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 697254414.0, "reward": 0.621651828289032, "reward_std": 0.1480829268693924, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 1272 }, { "clip_ratio/high_max": 0.0017303563763562124, "clip_ratio/high_mean": 0.0005526193217519904, "clip_ratio/low_mean": 0.0003969038589275442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009495231497567147, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3646.0, "completions/mean_length": 1010.5313110351562, "completions/mean_terminated_length": 551.6666870117188, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 11.895626822157434, "grad_norm": 0.29500406980514526, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 697773394.0, "reward": 0.6328125, "reward_std": 0.11547327041625977, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1273 }, { "clip_ratio/high_max": 0.001746648660628125, "clip_ratio/high_mean": 0.0005820595306431642, "clip_ratio/low_mean": 0.0003811074420809746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009631669599912129, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2596.0, "completions/mean_length": 974.8694458007812, "completions/mean_terminated_length": 565.0239868164062, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 11.904956268221575, "grad_norm": 0.25987085700035095, "learning_rate": 1e-06, "loss": -0.0483, "num_tokens": 698310221.0, "reward": 0.6026785969734192, "reward_std": 0.14627917110919952, "rewards/verify_math_reward/mean": 0.6026785969734192, "rewards/verify_math_reward/std": 0.48961687088012695, "step": 1274 }, { "clip_ratio/high_max": 0.001714388436084846, "clip_ratio/high_mean": 0.000591999774769647, "clip_ratio/low_mean": 0.0003437696518631128, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009357694325444754, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3182.0, "completions/mean_length": 1107.009033203125, "completions/mean_terminated_length": 590.5863647460938, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 11.914285714285715, "grad_norm": 0.227379709482193, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 698852605.0, "reward": 0.6328125, "reward_std": 0.13606080412864685, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1275 }, { "clip_ratio/high_max": 0.002196255649323575, "clip_ratio/high_mean": 0.0007929484127089381, "clip_ratio/low_mean": 0.0003965649657402537, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001189513372082729, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 918.0491333007812, "completions/mean_terminated_length": 527.7744140625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 11.923615160349854, "grad_norm": 0.2657451629638672, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 699365449.0, "reward": 0.6361607313156128, "reward_std": 0.1429387927055359, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1276 }, { "clip_ratio/high_max": 0.001362100110782194, "clip_ratio/high_mean": 0.0005375641376303975, "clip_ratio/low_mean": 0.00043177891700452165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009693430529296165, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3470.0, "completions/mean_length": 945.3717041015625, "completions/mean_terminated_length": 540.6309814453125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 11.932944606413994, "grad_norm": 0.22986739873886108, "learning_rate": 1e-06, "loss": -0.0734, "num_tokens": 699885182.0, "reward": 0.6540178656578064, "reward_std": 0.1504889577627182, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1277 }, { "clip_ratio/high_max": 0.0015836131751711946, "clip_ratio/high_mean": 0.000602021920713014, "clip_ratio/low_mean": 0.0004741516077046981, "clip_ratio/low_min": 3.965853011322906e-05, "clip_ratio/region_mean": 0.0010761735211417545, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2660.0, "completions/mean_length": 1021.411865234375, "completions/mean_terminated_length": 550.5289916992188, "completions/min_length": 169.0, "completions/min_terminated_length": 169.0, "epoch": 11.942274052478133, "grad_norm": 0.2524794042110443, "learning_rate": 1e-06, "loss": -0.0507, "num_tokens": 700396959.0, "reward": 0.6205357313156128, "reward_std": 0.15973204374313354, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1278 }, { "clip_ratio/high_max": 0.0017062622755474877, "clip_ratio/high_mean": 0.0006627615730394609, "clip_ratio/low_mean": 0.00026853110784941236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009312926595157478, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 936.6395263671875, "completions/mean_terminated_length": 557.5162353515625, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 11.951603498542275, "grad_norm": 0.2874181270599365, "learning_rate": 1e-06, "loss": -0.0453, "num_tokens": 700924588.0, "reward": 0.6774553656578064, "reward_std": 0.125994011759758, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1279 }, { "clip_ratio/high_max": 0.002430144875688711, "clip_ratio/high_mean": 0.0007032658213574905, "clip_ratio/low_mean": 0.0003796259118189482, "clip_ratio/low_min": 2.031859548878856e-05, "clip_ratio/region_mean": 0.0010828917202161392, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 949.2489013671875, "completions/mean_terminated_length": 513.4218139648438, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 11.960932944606414, "grad_norm": 0.24304766952991486, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 701422435.0, "reward": 0.6975446939468384, "reward_std": 0.1239977478981018, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957791805267334, "step": 1280 }, { "clip_ratio/high_max": 0.0028999966161791235, "clip_ratio/high_mean": 0.0009244636557923513, "clip_ratio/low_mean": 0.0004252248681950732, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013496885294443928, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3781.0, "completions/mean_length": 1000.8460083007812, "completions/mean_terminated_length": 526.8134155273438, "completions/min_length": 168.0, "completions/min_terminated_length": 168.0, "epoch": 11.970262390670554, "grad_norm": 1.5346214771270752, "learning_rate": 1e-06, "loss": -0.0468, "num_tokens": 701923937.0, "reward": 0.6752232313156128, "reward_std": 0.14699704945087433, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1281 }, { "clip_ratio/high_max": 0.0022753454104531556, "clip_ratio/high_mean": 0.000832652862300165, "clip_ratio/low_mean": 0.00048645421611581696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013191070684115402, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3859.0, "completions/mean_length": 932.9777221679688, "completions/mean_terminated_length": 544.5363159179688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 11.979591836734693, "grad_norm": 0.28128111362457275, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 702452005.0, "reward": 0.6395089626312256, "reward_std": 0.16251038014888763, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111123085022, "step": 1282 }, { "clip_ratio/high_max": 0.002333375727175735, "clip_ratio/high_mean": 0.0008855625001160661, "clip_ratio/low_mean": 0.00031522811423201347, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012007906225335319, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 975.5803833007812, "completions/mean_terminated_length": 574.7203979492188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 11.988921282798835, "grad_norm": 2.9312286376953125, "learning_rate": 1e-06, "loss": -0.0559, "num_tokens": 703001525.0, "reward": 0.613839328289032, "reward_std": 0.1521807461977005, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1283 }, { "clip_ratio/high_max": 0.0018744780209090095, "clip_ratio/high_mean": 0.000724603861272044, "clip_ratio/low_mean": 0.00047104938903430593, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001195653254399076, "completions/clipped_ratio": 0.12784090909090906, "completions/max_length": 4096.0, "completions/max_terminated_length": 2255.0, "completions/mean_length": 983.571044921875, "completions/mean_terminated_length": 527.3517456054688, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 11.998250728862974, "grad_norm": 0.23256705701351166, "learning_rate": 1e-06, "loss": -0.0778, "num_tokens": 703512836.0, "reward": 0.6953125596046448, "reward_std": 0.15146788954734802, "rewards/verify_math_reward/mean": 0.6953125, "rewards/verify_math_reward/std": 0.4605320394039154, "step": 1284 }, { "clip_ratio/high_max": 0.002448100822221022, "clip_ratio/high_mean": 0.0007052671644487418, "clip_ratio/low_mean": 0.00039358966751024127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010988568064931314, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2148.0, "completions/mean_length": 1018.099365234375, "completions/mean_terminated_length": 495.7402038574219, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 12.00932944606414, "grad_norm": 0.2784478962421417, "learning_rate": 1e-06, "loss": -0.0638, "num_tokens": 703980069.0, "reward": 0.6428571939468384, "reward_std": 0.13722126185894012, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1285 }, { "clip_ratio/high_max": 0.002386544922046596, "clip_ratio/high_mean": 0.0008257443287220667, "clip_ratio/low_mean": 0.00044577935909728694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012715236880467273, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2701.0, "completions/mean_length": 881.3248291015625, "completions/mean_terminated_length": 500.05865478515625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.018658892128279, "grad_norm": 0.24729658663272858, "learning_rate": 1e-06, "loss": -0.0523, "num_tokens": 704468696.0, "reward": 0.652901828289032, "reward_std": 0.12633031606674194, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1286 }, { "clip_ratio/high_max": 0.0015934536331769777, "clip_ratio/high_mean": 0.0006365084691424272, "clip_ratio/low_mean": 0.00045211950282464386, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001088627970602829, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3354.0, "completions/mean_length": 846.5892944335938, "completions/mean_terminated_length": 514.8536376953125, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 12.02798833819242, "grad_norm": 0.26070430874824524, "learning_rate": 1e-06, "loss": -0.0342, "num_tokens": 704978640.0, "reward": 0.7020089626312256, "reward_std": 0.14383850991725922, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1287 }, { "clip_ratio/high_max": 0.001844758658990031, "clip_ratio/high_mean": 0.0006172262137624784, "clip_ratio/low_mean": 0.0002482873160261079, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008655135297885863, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2534.0, "completions/mean_length": 855.1094360351562, "completions/mean_terminated_length": 506.58343505859375, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.03731778425656, "grad_norm": 0.5442487001419067, "learning_rate": 1e-06, "loss": -0.0242, "num_tokens": 705476786.0, "reward": 0.7299107313156128, "reward_std": 0.11930371820926666, "rewards/verify_math_reward/mean": 0.7299107313156128, "rewards/verify_math_reward/std": 0.44425371289253235, "step": 1288 }, { "clip_ratio/high_max": 0.002029318577115191, "clip_ratio/high_mean": 0.0007828341404092498, "clip_ratio/low_mean": 0.00040049768631433835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011833318276330829, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3228.0, "completions/mean_length": 845.677490234375, "completions/mean_terminated_length": 522.6392822265625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 12.0466472303207, "grad_norm": 23153.896484375, "learning_rate": 1e-06, "loss": 0.3239, "num_tokens": 705980673.0, "reward": 0.7433035969734192, "reward_std": 0.1471051424741745, "rewards/verify_math_reward/mean": 0.7433035969734192, "rewards/verify_math_reward/std": 0.43705442547798157, "step": 1289 }, { "clip_ratio/high_max": 0.002059018261206802, "clip_ratio/high_mean": 0.0006915239491718239, "clip_ratio/low_mean": 0.0004089122239747667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011004361895174952, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 989.8058471679688, "completions/mean_terminated_length": 504.8387145996094, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 12.055976676384839, "grad_norm": 0.7496042251586914, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 706458515.0, "reward": 0.6540178656578064, "reward_std": 0.14594538509845734, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1290 }, { "clip_ratio/high_max": 0.0014425257886614418, "clip_ratio/high_mean": 0.0004379914612400171, "clip_ratio/low_mean": 0.00033467197226855205, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007726634412392741, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3401.0, "completions/mean_length": 841.7210083007812, "completions/mean_terminated_length": 522.6740112304688, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 12.06530612244898, "grad_norm": 1.2562134265899658, "learning_rate": 1e-06, "loss": -0.0185, "num_tokens": 706975625.0, "reward": 0.6417410969734192, "reward_std": 0.11471623182296753, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1291 }, { "clip_ratio/high_max": 0.0021811319966218434, "clip_ratio/high_mean": 0.0007741660901956493, "clip_ratio/low_mean": 0.00045813859105692245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012323047012614552, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 1005.6864013671875, "completions/mean_terminated_length": 518.5827026367188, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.07463556851312, "grad_norm": 0.6910886764526367, "learning_rate": 1e-06, "loss": -0.06, "num_tokens": 707471544.0, "reward": 0.6383928656578064, "reward_std": 0.15977439284324646, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1292 }, { "clip_ratio/high_max": 0.0014795328643231187, "clip_ratio/high_mean": 0.000543700685739168, "clip_ratio/low_mean": 0.0005049879619036801, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010486886239959858, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 1017.1942138671875, "completions/mean_terminated_length": 568.36572265625, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 12.08396501457726, "grad_norm": 0.26466211676597595, "learning_rate": 1e-06, "loss": -0.0361, "num_tokens": 708006262.0, "reward": 0.6484375, "reward_std": 0.1283929944038391, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1293 }, { "clip_ratio/high_max": 0.0018227593245683238, "clip_ratio/high_mean": 0.0005954590096735046, "clip_ratio/low_mean": 0.0006368893909893814, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012323484115768224, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2663.0, "completions/mean_length": 1029.587158203125, "completions/mean_terminated_length": 537.0543823242188, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.093294460641399, "grad_norm": 1.189442753791809, "learning_rate": 1e-06, "loss": -0.0521, "num_tokens": 708515316.0, "reward": 0.6350446939468384, "reward_std": 0.13827574253082275, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.4816865026950836, "step": 1294 }, { "clip_ratio/high_max": 0.0022230715621844865, "clip_ratio/high_mean": 0.0008959492315625539, "clip_ratio/low_mean": 0.00033243598750232195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012283852156542707, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3801.0, "completions/mean_length": 978.7422485351562, "completions/mean_terminated_length": 560.4772338867188, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.102623906705539, "grad_norm": 0.2985447347164154, "learning_rate": 1e-06, "loss": -0.0674, "num_tokens": 709050797.0, "reward": 0.652901828289032, "reward_std": 0.14263640344142914, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 1295 }, { "clip_ratio/high_max": 0.0015567751815979136, "clip_ratio/high_mean": 0.0005009084907214856, "clip_ratio/low_mean": 0.00029084627249176265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007917547663964797, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3497.0, "completions/mean_length": 861.9866333007812, "completions/mean_terminated_length": 523.0332641601562, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 12.11195335276968, "grad_norm": 2.3686461448669434, "learning_rate": 1e-06, "loss": -0.0186, "num_tokens": 709558057.0, "reward": 0.6651785969734192, "reward_std": 0.1011870950460434, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 1296 }, { "clip_ratio/high_max": 0.002217165849287994, "clip_ratio/high_mean": 0.0008089675575320143, "clip_ratio/low_mean": 0.00031538944858766627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011243570315855322, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2325.0, "completions/mean_length": 886.3538208007812, "completions/mean_terminated_length": 545.5765380859375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 12.12128279883382, "grad_norm": 0.21736620366573334, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 710086078.0, "reward": 0.7087053656578064, "reward_std": 0.14376294612884521, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1297 }, { "clip_ratio/high_max": 0.0017939078898052685, "clip_ratio/high_mean": 0.0007152509715524502, "clip_ratio/low_mean": 0.0003571505853869894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010724015628511552, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3385.0, "completions/mean_length": 883.9263916015625, "completions/mean_terminated_length": 569.0171508789062, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.130612244897959, "grad_norm": 0.20817220211029053, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 710643932.0, "reward": 0.6328125, "reward_std": 0.12948885560035706, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1298 }, { "clip_ratio/high_max": 0.001732151722535491, "clip_ratio/high_mean": 0.0006494188701253734, "clip_ratio/low_mean": 0.0003305515740521514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009799704366741935, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2872.0, "completions/mean_length": 825.8717041015625, "completions/mean_terminated_length": 514.0501708984375, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.139941690962099, "grad_norm": 0.22387662529945374, "learning_rate": 1e-06, "loss": -0.0497, "num_tokens": 711149745.0, "reward": 0.7042410969734192, "reward_std": 0.12245932221412659, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1299 }, { "clip_ratio/high_max": 0.002410630557278637, "clip_ratio/high_mean": 0.0008133751998684602, "clip_ratio/low_mean": 0.00035388790547585813, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011672631117107812, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 1056.325927734375, "completions/mean_terminated_length": 577.2041625976562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.14927113702624, "grad_norm": 0.34515145421028137, "learning_rate": 1e-06, "loss": -0.0469, "num_tokens": 711696493.0, "reward": 0.640625, "reward_std": 0.16003261506557465, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1300 }, { "clip_ratio/high_max": 0.0015739901791675948, "clip_ratio/high_mean": 0.0005282271149553708, "clip_ratio/low_mean": 0.0003222537222882238, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008504808429279365, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2900.0, "completions/mean_length": 816.1295166015625, "completions/mean_terminated_length": 516.5067138671875, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 12.15860058309038, "grad_norm": 0.24931249022483826, "learning_rate": 1e-06, "loss": -0.0334, "num_tokens": 712205073.0, "reward": 0.7209821939468384, "reward_std": 0.11257727444171906, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1301 }, { "clip_ratio/high_max": 0.0016901142698770855, "clip_ratio/high_mean": 0.0005469306806844543, "clip_ratio/low_mean": 0.00030967633620093693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000856607028254075, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3696.0, "completions/mean_length": 916.333740234375, "completions/mean_terminated_length": 543.6546020507812, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 12.167930029154519, "grad_norm": 0.24502603709697723, "learning_rate": 1e-06, "loss": -0.0511, "num_tokens": 712731524.0, "reward": 0.6484375, "reward_std": 0.11858581006526947, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1302 }, { "clip_ratio/high_max": 0.0014539215371769387, "clip_ratio/high_mean": 0.000498499149671261, "clip_ratio/low_mean": 0.0003480606339962833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008465597966278438, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2060.0, "completions/mean_length": 913.7567138671875, "completions/mean_terminated_length": 500.4262390136719, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.177259475218658, "grad_norm": 0.22843466699123383, "learning_rate": 1e-06, "loss": -0.0434, "num_tokens": 713211074.0, "reward": 0.7198660969734192, "reward_std": 0.12110385298728943, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931530952453613, "step": 1303 }, { "clip_ratio/high_max": 0.0018457827500242274, "clip_ratio/high_mean": 0.0005627900591207435, "clip_ratio/low_mean": 0.00025844126093943487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008212313186959364, "completions/clipped_ratio": 0.0892857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 800.4721069335938, "completions/mean_terminated_length": 477.3811340332031, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 12.186588921282798, "grad_norm": 0.19199474155902863, "learning_rate": 1e-06, "loss": -0.0335, "num_tokens": 713680297.0, "reward": 0.699776828289032, "reward_std": 0.08837654441595078, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1304 }, { "clip_ratio/high_max": 0.0011734882300515892, "clip_ratio/high_mean": 0.0003831462172456668, "clip_ratio/low_mean": 0.00027823038726637606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006613765990550746, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2768.0, "completions/mean_length": 926.8370971679688, "completions/mean_terminated_length": 550.96875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 12.19591836734694, "grad_norm": 0.20187468826770782, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 714212319.0, "reward": 0.6584821939468384, "reward_std": 0.12253419309854507, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1305 }, { "clip_ratio/high_max": 0.0018866618229367305, "clip_ratio/high_mean": 0.0006295248540482135, "clip_ratio/low_mean": 0.00036837365564679203, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009978985372072202, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3804.0, "completions/mean_length": 909.1116333007812, "completions/mean_terminated_length": 526.6849975585938, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 12.205247813411079, "grad_norm": 0.2795391380786896, "learning_rate": 1e-06, "loss": -0.0475, "num_tokens": 714720035.0, "reward": 0.6774553656578064, "reward_std": 0.12102828174829483, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1306 }, { "clip_ratio/high_max": 0.0019569530631997623, "clip_ratio/high_mean": 0.000720999252735055, "clip_ratio/low_mean": 0.0003416577901589335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010626570328895468, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2499.0, "completions/mean_length": 959.7500610351562, "completions/mean_terminated_length": 529.9086303710938, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 12.214577259475218, "grad_norm": 0.2649560570716858, "learning_rate": 1e-06, "loss": -0.0626, "num_tokens": 715226251.0, "reward": 0.6741071939468384, "reward_std": 0.13970720767974854, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692258834839, "step": 1307 }, { "clip_ratio/high_max": 0.0020844952705374453, "clip_ratio/high_mean": 0.0007541122831753455, "clip_ratio/low_mean": 0.000370883740743011, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011249960371060297, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3845.0, "completions/mean_length": 1043.618408203125, "completions/mean_terminated_length": 567.0529174804688, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 12.223906705539358, "grad_norm": 0.2061583697795868, "learning_rate": 1e-06, "loss": -0.0744, "num_tokens": 715751605.0, "reward": 0.6272321939468384, "reward_std": 0.13639894127845764, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1308 }, { "clip_ratio/high_max": 0.0018267015984747559, "clip_ratio/high_mean": 0.0007149914308683947, "clip_ratio/low_mean": 0.00041626947313488927, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011312609058222733, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3141.0, "completions/mean_length": 1053.969970703125, "completions/mean_terminated_length": 551.5799560546875, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 12.2332361516035, "grad_norm": 8347.5732421875, "learning_rate": 1e-06, "loss": 0.4866, "num_tokens": 716262578.0, "reward": 0.6305803656578064, "reward_std": 0.14203909039497375, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1309 }, { "clip_ratio/high_max": 0.0015578833299514372, "clip_ratio/high_mean": 0.0006038658484612824, "clip_ratio/low_mean": 0.0004937596686431789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010976255343848607, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 1710.0, "completions/mean_length": 894.0625610351562, "completions/mean_terminated_length": 532.1043090820312, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.242565597667639, "grad_norm": 0.2176607847213745, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 716778194.0, "reward": 0.6629464626312256, "reward_std": 0.14748378098011017, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1310 }, { "clip_ratio/high_max": 0.0018361847760388628, "clip_ratio/high_mean": 0.0006725578768964624, "clip_ratio/low_mean": 0.00047711110664749867, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011496689621708356, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3422.0, "completions/mean_length": 915.4420166015625, "completions/mean_terminated_length": 511.3710632324219, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.251895043731778, "grad_norm": 0.23621632158756256, "learning_rate": 1e-06, "loss": -0.0928, "num_tokens": 717265078.0, "reward": 0.6852678656578064, "reward_std": 0.14868700504302979, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1311 }, { "clip_ratio/high_max": 0.0017448979124310426, "clip_ratio/high_mean": 0.0005409900240920251, "clip_ratio/low_mean": 0.0003150496285115878, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008560396508983104, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 1089.07373046875, "completions/mean_terminated_length": 532.2354125976562, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 12.261224489795918, "grad_norm": 0.26443490386009216, "learning_rate": 1e-06, "loss": -0.0521, "num_tokens": 717759792.0, "reward": 0.5703125, "reward_std": 0.13999709486961365, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1312 }, { "clip_ratio/high_max": 0.0018752369069261476, "clip_ratio/high_mean": 0.000779215579314041, "clip_ratio/low_mean": 0.00040045686000667047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011796724211308174, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2948.0, "completions/mean_length": 970.13623046875, "completions/mean_terminated_length": 568.5767822265625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 12.270553935860057, "grad_norm": 0.2585813105106354, "learning_rate": 1e-06, "loss": -0.0613, "num_tokens": 718296866.0, "reward": 0.6272321939468384, "reward_std": 0.1594713032245636, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1313 }, { "clip_ratio/high_max": 0.0014747667009942234, "clip_ratio/high_mean": 0.0005256458553049015, "clip_ratio/low_mean": 0.00038528843151652836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009109342699957779, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3269.0, "completions/mean_length": 828.2924194335938, "completions/mean_terminated_length": 534.1192016601562, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.279883381924199, "grad_norm": 0.2321818768978119, "learning_rate": 1e-06, "loss": -0.0275, "num_tokens": 718826944.0, "reward": 0.6819196939468384, "reward_std": 0.11825203895568848, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1314 }, { "clip_ratio/high_max": 0.0018210375856142491, "clip_ratio/high_mean": 0.0006717022824886953, "clip_ratio/low_mean": 0.0003300168152691185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010017190434155054, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 797.3504638671875, "completions/mean_terminated_length": 509.1189270019531, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.289212827988338, "grad_norm": 0.2518939971923828, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 719326674.0, "reward": 0.7254464626312256, "reward_std": 0.11779487878084183, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1315 }, { "clip_ratio/high_max": 0.001613775129953865, "clip_ratio/high_mean": 0.0006450811197282746, "clip_ratio/low_mean": 0.0003908105545633589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010358916770201176, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 1745.0, "completions/mean_length": 898.0670166015625, "completions/mean_terminated_length": 509.8323059082031, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 12.298542274052478, "grad_norm": 0.29463186860084534, "learning_rate": 1e-06, "loss": -0.0701, "num_tokens": 719826702.0, "reward": 0.6718750596046448, "reward_std": 0.14432775974273682, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1316 }, { "clip_ratio/high_max": 0.0014518742937070783, "clip_ratio/high_mean": 0.0005386547600210179, "clip_ratio/low_mean": 0.00026829864145838656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000806953390565468, "completions/clipped_ratio": 0.0792410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 846.0424194335938, "completions/mean_terminated_length": 566.34912109375, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 12.307871720116617, "grad_norm": 0.2016036957502365, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 720381084.0, "reward": 0.6741071939468384, "reward_std": 0.11114511638879776, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1317 }, { "clip_ratio/high_max": 0.0024602458797744475, "clip_ratio/high_mean": 0.000720085497960099, "clip_ratio/low_mean": 0.0005095433743917965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001229628876899369, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 1088.310302734375, "completions/mean_terminated_length": 564.0341186523438, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 12.317201166180759, "grad_norm": 0.2657475471496582, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 720910994.0, "reward": 0.5758928656578064, "reward_std": 0.14191961288452148, "rewards/verify_math_reward/mean": 0.5758928656578064, "rewards/verify_math_reward/std": 0.49448272585868835, "step": 1318 }, { "clip_ratio/high_max": 0.0019393142938497476, "clip_ratio/high_mean": 0.0006565232779394137, "clip_ratio/low_mean": 0.00029207376746853697, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009485970495006768, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 825.8694458007812, "completions/mean_terminated_length": 514.0477294921875, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.326530612244898, "grad_norm": 0.24735401570796967, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 721414773.0, "reward": 0.7187500596046448, "reward_std": 0.11899950355291367, "rewards/verify_math_reward/mean": 0.71875, "rewards/verify_math_reward/std": 0.4498603343963623, "step": 1319 }, { "clip_ratio/high_max": 0.0017042428044078406, "clip_ratio/high_mean": 0.0005626931406368385, "clip_ratio/low_mean": 0.0002088966343762877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007715897627349477, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3585.0, "completions/mean_length": 996.0156860351562, "completions/mean_terminated_length": 597.7808227539062, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 12.335860058309038, "grad_norm": 0.18399116396903992, "learning_rate": 1e-06, "loss": -0.0307, "num_tokens": 721975811.0, "reward": 0.6395089626312256, "reward_std": 0.10182830691337585, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1320 }, { "clip_ratio/high_max": 0.0016912338433030527, "clip_ratio/high_mean": 0.0006079029899410671, "clip_ratio/low_mean": 0.0005140973971720086, "clip_ratio/low_min": 1.20586537377676e-05, "clip_ratio/region_mean": 0.0011220004053029697, "completions/clipped_ratio": 0.1272321428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2808.0, "completions/mean_length": 1024.05810546875, "completions/mean_terminated_length": 576.2301635742188, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 12.345189504373177, "grad_norm": 0.21878719329833984, "learning_rate": 1e-06, "loss": -0.0403, "num_tokens": 722511047.0, "reward": 0.5792410969734192, "reward_std": 0.14286141097545624, "rewards/verify_math_reward/mean": 0.5792410969734192, "rewards/verify_math_reward/std": 0.49395665526390076, "step": 1321 }, { "clip_ratio/high_max": 0.0014700482706757612, "clip_ratio/high_mean": 0.0004403067352996004, "clip_ratio/low_mean": 0.00023282826759896125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000673134985845536, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3186.0, "completions/mean_length": 1039.094970703125, "completions/mean_terminated_length": 529.6107177734375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 12.354518950437317, "grad_norm": 0.21954764425754547, "learning_rate": 1e-06, "loss": -0.0477, "num_tokens": 723004596.0, "reward": 0.6305803656578064, "reward_std": 0.10682723671197891, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1322 }, { "clip_ratio/high_max": 0.0013260283303679898, "clip_ratio/high_mean": 0.00045830250110157067, "clip_ratio/low_mean": 0.00031886689885141095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007771694072289392, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 974.0145263671875, "completions/mean_terminated_length": 532.561767578125, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 12.363848396501458, "grad_norm": 0.3715142011642456, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 723509937.0, "reward": 0.6328125, "reward_std": 0.11738258600234985, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1323 }, { "clip_ratio/high_max": 0.002065404762106482, "clip_ratio/high_mean": 0.0007164950420701643, "clip_ratio/low_mean": 0.0008087563928711461, "clip_ratio/low_min": 2.6260148842993658e-05, "clip_ratio/region_mean": 0.0015252514167514164, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3686.0, "completions/mean_length": 1027.399658203125, "completions/mean_terminated_length": 575.5570068359375, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.373177842565598, "grad_norm": 0.3467468321323395, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 724055543.0, "reward": 0.5881696939468384, "reward_std": 0.1622903198003769, "rewards/verify_math_reward/mean": 0.5881696343421936, "rewards/verify_math_reward/std": 0.4924395978450775, "step": 1324 }, { "clip_ratio/high_max": 0.0014852873864583671, "clip_ratio/high_mean": 0.000510515309542825, "clip_ratio/low_mean": 0.00048510939222978777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009956246976798866, "completions/clipped_ratio": 0.1651785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3804.0, "completions/mean_length": 1146.51123046875, "completions/mean_terminated_length": 562.9224853515625, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.382507288629737, "grad_norm": 0.30605512857437134, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 724562401.0, "reward": 0.5703125, "reward_std": 0.13305816054344177, "rewards/verify_math_reward/mean": 0.5703125, "rewards/verify_math_reward/std": 0.49530795216560364, "step": 1325 }, { "clip_ratio/high_max": 0.002095753065077588, "clip_ratio/high_mean": 0.0006541928869410185, "clip_ratio/low_mean": 0.00034604472557475674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010002375929616392, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3523.0, "completions/mean_length": 1100.7567138671875, "completions/mean_terminated_length": 624.1526489257812, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 12.391836734693877, "grad_norm": 0.2333359271287918, "learning_rate": 1e-06, "loss": -0.0588, "num_tokens": 725137831.0, "reward": 0.5424107313156128, "reward_std": 0.13873039186000824, "rewards/verify_math_reward/mean": 0.5424107313156128, "rewards/verify_math_reward/std": 0.4984763264656067, "step": 1326 }, { "clip_ratio/high_max": 0.0017548011946928455, "clip_ratio/high_mean": 0.0004978831566404551, "clip_ratio/low_mean": 0.00042375341763545293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000921636565180961, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4094.0, "completions/mean_length": 871.6908569335938, "completions/mean_terminated_length": 533.754638671875, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 12.401166180758018, "grad_norm": 0.3012625277042389, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 725649330.0, "reward": 0.6383928656578064, "reward_std": 0.11325128376483917, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 1327 }, { "clip_ratio/high_max": 0.002174711273255525, "clip_ratio/high_mean": 0.0007291964629985159, "clip_ratio/low_mean": 0.00045984219013917027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011890386158484034, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3140.0, "completions/mean_length": 887.8605346679688, "completions/mean_terminated_length": 529.6315307617188, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.410495626822158, "grad_norm": 0.2796292304992676, "learning_rate": 1e-06, "loss": -0.0783, "num_tokens": 726165405.0, "reward": 0.6908482313156128, "reward_std": 0.17705437541007996, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1328 }, { "clip_ratio/high_max": 0.0018297717724635731, "clip_ratio/high_mean": 0.0006870367415103829, "clip_ratio/low_mean": 0.0003574270317585615, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010444637900945963, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2078.0, "completions/mean_length": 827.0870971679688, "completions/mean_terminated_length": 493.3603820800781, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.419825072886297, "grad_norm": 0.33825457096099854, "learning_rate": 1e-06, "loss": -0.046, "num_tokens": 726656691.0, "reward": 0.6852678656578064, "reward_std": 0.13139888644218445, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1329 }, { "clip_ratio/high_max": 0.0021442070792545564, "clip_ratio/high_mean": 0.0008315178347402252, "clip_ratio/low_mean": 0.00040918350123320124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012407013527990784, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1688.0, "completions/mean_length": 955.5279541015625, "completions/mean_terminated_length": 520.5704956054688, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 12.429154518950437, "grad_norm": 0.596783459186554, "learning_rate": 1e-06, "loss": -0.0944, "num_tokens": 727167484.0, "reward": 0.6171875, "reward_std": 0.1715322583913803, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1330 }, { "clip_ratio/high_max": 0.0018381874469923787, "clip_ratio/high_mean": 0.0007178178530011792, "clip_ratio/low_mean": 0.0003124983541056281, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010303161889169132, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3827.0, "completions/mean_length": 937.8460083007812, "completions/mean_terminated_length": 527.6444091796875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 12.438483965014576, "grad_norm": 0.3436470329761505, "learning_rate": 1e-06, "loss": -0.0832, "num_tokens": 727673458.0, "reward": 0.6785714626312256, "reward_std": 0.15834084153175354, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1331 }, { "clip_ratio/high_max": 0.002104193546983879, "clip_ratio/high_mean": 0.000805018111350364, "clip_ratio/low_mean": 0.00025845954769465607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001063477680872893, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1872.0, "completions/mean_length": 756.2154541015625, "completions/mean_terminated_length": 486.2931213378906, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.447813411078718, "grad_norm": 0.26644325256347656, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 728162619.0, "reward": 0.7477678656578064, "reward_std": 0.14382894337177277, "rewards/verify_math_reward/mean": 0.7477678656578064, "rewards/verify_math_reward/std": 0.434536337852478, "step": 1332 }, { "clip_ratio/high_max": 0.0019071974747930653, "clip_ratio/high_mean": 0.0005061422020844475, "clip_ratio/low_mean": 0.0006438554846681654, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011499976844788762, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2670.0, "completions/mean_length": 1034.37060546875, "completions/mean_terminated_length": 537.9973754882812, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 12.457142857142857, "grad_norm": 0.3891693949699402, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 728678415.0, "reward": 0.5892857313156128, "reward_std": 0.1322993040084839, "rewards/verify_math_reward/mean": 0.5892857313156128, "rewards/verify_math_reward/std": 0.49223825335502625, "step": 1333 }, { "clip_ratio/high_max": 0.0020731909971800633, "clip_ratio/high_mean": 0.0007368674869212555, "clip_ratio/low_mean": 0.00034514032768129255, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010820078059623484, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3718.0, "completions/mean_length": 1022.29248046875, "completions/mean_terminated_length": 523.9610595703125, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 12.466472303206997, "grad_norm": 0.4087883234024048, "learning_rate": 1e-06, "loss": -0.0774, "num_tokens": 729174453.0, "reward": 0.6283482313156128, "reward_std": 0.14969663321971893, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159182548523, "step": 1334 }, { "clip_ratio/high_max": 0.0013565474546339829, "clip_ratio/high_mean": 0.0005092681140013156, "clip_ratio/low_mean": 0.00016760302048623998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006768711373297265, "completions/clipped_ratio": 0.0625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3007.0, "completions/mean_length": 721.6138916015625, "completions/mean_terminated_length": 496.65478515625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 12.475801749271136, "grad_norm": 0.18541057407855988, "learning_rate": 1e-06, "loss": -0.0332, "num_tokens": 729672259.0, "reward": 0.723214328289032, "reward_std": 0.0914565846323967, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1335 }, { "clip_ratio/high_max": 0.0025216024623659905, "clip_ratio/high_mean": 0.000756388562876964, "clip_ratio/low_mean": 0.0003998155166300421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011562040745047852, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2925.0, "completions/mean_length": 919.2388916015625, "completions/mean_terminated_length": 524.6348876953125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 12.485131195335278, "grad_norm": 0.28288766741752625, "learning_rate": 1e-06, "loss": -0.0747, "num_tokens": 730185161.0, "reward": 0.6830357313156128, "reward_std": 0.14124813675880432, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1336 }, { "clip_ratio/high_max": 0.001978162796149263, "clip_ratio/high_mean": 0.000796153801275068, "clip_ratio/low_mean": 0.0003990620625700103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011952158638450783, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3891.0, "completions/mean_length": 880.2645263671875, "completions/mean_terminated_length": 530.035888671875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 12.494460641399417, "grad_norm": 0.272204726934433, "learning_rate": 1e-06, "loss": -0.0238, "num_tokens": 730705782.0, "reward": 0.6752232313156128, "reward_std": 0.13572561740875244, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1337 }, { "clip_ratio/high_max": 0.0017550437987665646, "clip_ratio/high_mean": 0.0006655100432908512, "clip_ratio/low_mean": 0.00032973168163152877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009952417258318746, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2684.0, "completions/mean_length": 893.2902221679688, "completions/mean_terminated_length": 540.079345703125, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 12.503790087463557, "grad_norm": 0.21157510578632355, "learning_rate": 1e-06, "loss": -0.0434, "num_tokens": 731231402.0, "reward": 0.6495535969734192, "reward_std": 0.13260099291801453, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1338 }, { "clip_ratio/high_max": 0.0019510428319335915, "clip_ratio/high_mean": 0.0007264954570018745, "clip_ratio/low_mean": 0.0005139458235134953, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012404412664182018, "completions/clipped_ratio": 0.1473214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2254.0, "completions/mean_length": 1069.1160888671875, "completions/mean_terminated_length": 546.1466064453125, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 12.513119533527696, "grad_norm": 0.2677256762981415, "learning_rate": 1e-06, "loss": -0.0351, "num_tokens": 731749626.0, "reward": 0.6283482313156128, "reward_std": 0.13196228444576263, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159480571747, "step": 1339 }, { "clip_ratio/high_max": 0.0014662169305665884, "clip_ratio/high_mean": 0.0006078013284422923, "clip_ratio/low_mean": 0.00047037160948093515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010781729142763652, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2787.0, "completions/mean_length": 921.1261596679688, "completions/mean_terminated_length": 570.9851684570312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 12.522448979591836, "grad_norm": 0.24825072288513184, "learning_rate": 1e-06, "loss": -0.0333, "num_tokens": 732300379.0, "reward": 0.6439732313156128, "reward_std": 0.16683784127235413, "rewards/verify_math_reward/mean": 0.6439732313156128, "rewards/verify_math_reward/std": 0.47909069061279297, "step": 1340 }, { "clip_ratio/high_max": 0.0018917612433142494, "clip_ratio/high_mean": 0.0005794450075882196, "clip_ratio/low_mean": 0.0005305540903464134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011099991024821065, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3103.0, "completions/mean_length": 906.1428833007812, "completions/mean_terminated_length": 571.8175048828125, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 12.531778425655977, "grad_norm": 0.20584636926651, "learning_rate": 1e-06, "loss": -0.0265, "num_tokens": 732847419.0, "reward": 0.6361607313156128, "reward_std": 0.12602904438972473, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1341 }, { "clip_ratio/high_max": 0.002032488802797161, "clip_ratio/high_mean": 0.0006238512614800129, "clip_ratio/low_mean": 0.0004995815661459346, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011234328158025164, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3197.0, "completions/mean_length": 921.6328735351562, "completions/mean_terminated_length": 522.8429565429688, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 12.541107871720117, "grad_norm": 0.25520849227905273, "learning_rate": 1e-06, "loss": -0.0395, "num_tokens": 733352234.0, "reward": 0.6395089626312256, "reward_std": 0.12471634149551392, "rewards/verify_math_reward/mean": 0.6395089030265808, "rewards/verify_math_reward/std": 0.4804111421108246, "step": 1342 }, { "clip_ratio/high_max": 0.002320948951819446, "clip_ratio/high_mean": 0.0008796048095973674, "clip_ratio/low_mean": 0.00044332246670819586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013229272626631428, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3588.0, "completions/mean_length": 855.5100708007812, "completions/mean_terminated_length": 546.5147094726562, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 12.550437317784256, "grad_norm": 0.24174198508262634, "learning_rate": 1e-06, "loss": -0.0354, "num_tokens": 733881731.0, "reward": 0.6506696939468384, "reward_std": 0.15518662333488464, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1343 }, { "clip_ratio/high_max": 0.002332564181415364, "clip_ratio/high_mean": 0.0009191828648908995, "clip_ratio/low_mean": 0.00034532180598034756, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012645046808756888, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2618.0, "completions/mean_length": 878.654052734375, "completions/mean_terminated_length": 514.9540405273438, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.559766763848396, "grad_norm": 0.24639609456062317, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 734376733.0, "reward": 0.6830357313156128, "reward_std": 0.1587231457233429, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1344 }, { "clip_ratio/high_max": 0.001727164286421612, "clip_ratio/high_mean": 0.0007320740969589679, "clip_ratio/low_mean": 0.0004977139587936108, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012297880348342005, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 929.91748046875, "completions/mean_terminated_length": 558.8304443359375, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 12.569096209912537, "grad_norm": 0.24643969535827637, "learning_rate": 1e-06, "loss": -0.0601, "num_tokens": 734918323.0, "reward": 0.6283482313156128, "reward_std": 0.16506938636302948, "rewards/verify_math_reward/mean": 0.6283482313156128, "rewards/verify_math_reward/std": 0.4835159182548523, "step": 1345 }, { "clip_ratio/high_max": 0.0017780049893190153, "clip_ratio/high_mean": 0.0006547151942868368, "clip_ratio/low_mean": 0.00029960907022541505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009543242740619462, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2644.0, "completions/mean_length": 780.7176513671875, "completions/mean_terminated_length": 512.775634765625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 12.578425655976677, "grad_norm": 0.27910077571868896, "learning_rate": 1e-06, "loss": -0.0544, "num_tokens": 735437726.0, "reward": 0.7042410969734192, "reward_std": 0.12561674416065216, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1346 }, { "clip_ratio/high_max": 0.0017548783544043545, "clip_ratio/high_mean": 0.0005606087624983047, "clip_ratio/low_mean": 0.00019346550914178806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007540742844867054, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3733.0, "completions/mean_length": 948.0379638671875, "completions/mean_terminated_length": 498.3290710449219, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 12.587755102040816, "grad_norm": 0.6453022360801697, "learning_rate": 1e-06, "loss": -0.0632, "num_tokens": 735918744.0, "reward": 0.6752232313156128, "reward_std": 0.11622144281864166, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1347 }, { "clip_ratio/high_max": 0.0020438689753063954, "clip_ratio/high_mean": 0.000658259799820371, "clip_ratio/low_mean": 0.0003055958482036658, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009638556457503, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3782.0, "completions/mean_length": 1000.98779296875, "completions/mean_terminated_length": 517.7664794921875, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.597084548104956, "grad_norm": 0.3222213685512543, "learning_rate": 1e-06, "loss": -0.0732, "num_tokens": 736407973.0, "reward": 0.676339328289032, "reward_std": 0.13722378015518188, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 1348 }, { "clip_ratio/high_max": 0.0024825293221510947, "clip_ratio/high_mean": 0.0007444149105140241, "clip_ratio/low_mean": 0.0002747075670868071, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010191224682785105, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2491.0, "completions/mean_length": 1003.98779296875, "completions/mean_terminated_length": 530.436279296875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 12.606413994169095, "grad_norm": 0.24971255660057068, "learning_rate": 1e-06, "loss": -0.0567, "num_tokens": 736918250.0, "reward": 0.6261160969734192, "reward_std": 0.12628935277462006, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1349 }, { "clip_ratio/high_max": 0.0024342500692000613, "clip_ratio/high_mean": 0.0009041311186592793, "clip_ratio/low_mean": 0.0003971068072132766, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013012379131396301, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3984.0, "completions/mean_length": 888.65185546875, "completions/mean_terminated_length": 485.7185974121094, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 12.615743440233237, "grad_norm": 0.2990378439426422, "learning_rate": 1e-06, "loss": -0.0409, "num_tokens": 737398250.0, "reward": 0.6629464626312256, "reward_std": 0.16299711167812347, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1350 }, { "clip_ratio/high_max": 0.0023502210242440924, "clip_ratio/high_mean": 0.0007898391268099658, "clip_ratio/low_mean": 0.0006947171011688624, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014845562145637814, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 920.7254638671875, "completions/mean_terminated_length": 530.7794189453125, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 12.625072886297376, "grad_norm": 16.867530822753906, "learning_rate": 1e-06, "loss": -0.0679, "num_tokens": 737911308.0, "reward": 0.640625, "reward_std": 0.15643151104450226, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1351 }, { "clip_ratio/high_max": 0.002359927231736947, "clip_ratio/high_mean": 0.0006368118611135287, "clip_ratio/low_mean": 0.0004894386020168895, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011262504376645666, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2875.0, "completions/mean_length": 925.33935546875, "completions/mean_terminated_length": 531.4931030273438, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 12.634402332361516, "grad_norm": 0.32085034251213074, "learning_rate": 1e-06, "loss": -0.0268, "num_tokens": 738422820.0, "reward": 0.6339285969734192, "reward_std": 0.11103636026382446, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1352 }, { "clip_ratio/high_max": 0.0020135758277319837, "clip_ratio/high_mean": 0.00059482392953214, "clip_ratio/low_mean": 0.0003226257676942623, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009174496535706567, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3710.0, "completions/mean_length": 984.9342041015625, "completions/mean_terminated_length": 513.0758056640625, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 12.643731778425655, "grad_norm": 0.5963546633720398, "learning_rate": 1e-06, "loss": -0.0277, "num_tokens": 738916025.0, "reward": 0.6774553656578064, "reward_std": 0.1061122789978981, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1353 }, { "clip_ratio/high_max": 0.0018552585133875255, "clip_ratio/high_mean": 0.0006031259708834114, "clip_ratio/low_mean": 0.000258984800439066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008621107517683413, "completions/clipped_ratio": 0.0959821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1985.0, "completions/mean_length": 800.7310791015625, "completions/mean_terminated_length": 450.86297607421875, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 12.653061224489797, "grad_norm": 0.25760844349861145, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 739358560.0, "reward": 0.7165178656578064, "reward_std": 0.10968086868524551, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 1354 }, { "clip_ratio/high_max": 0.0018420838496240322, "clip_ratio/high_mean": 0.0006966865385038545, "clip_ratio/low_mean": 0.0004199227610115486, "clip_ratio/low_min": 1.6141528249136172e-05, "clip_ratio/region_mean": 0.0011166092899657087, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2475.0, "completions/mean_length": 866.216552734375, "completions/mean_terminated_length": 545.2196044921875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 12.662390670553936, "grad_norm": 0.40403029322624207, "learning_rate": 1e-06, "loss": -0.0393, "num_tokens": 739896074.0, "reward": 0.6729910969734192, "reward_std": 0.15462279319763184, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1355 }, { "clip_ratio/high_max": 0.001981287838134449, "clip_ratio/high_mean": 0.0006317215529634268, "clip_ratio/low_mean": 0.00037931783617750625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001011039403238101, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2812.0, "completions/mean_length": 1033.76123046875, "completions/mean_terminated_length": 541.89892578125, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 12.671720116618076, "grad_norm": 0.2093334197998047, "learning_rate": 1e-06, "loss": -0.0726, "num_tokens": 740416092.0, "reward": 0.6049107313156128, "reward_std": 0.1397060751914978, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1356 }, { "clip_ratio/high_max": 0.0019730155836441554, "clip_ratio/high_mean": 0.0005656256280417438, "clip_ratio/low_mean": 0.00043394920339778764, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000999574829620542, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3459.0, "completions/mean_length": 1034.0648193359375, "completions/mean_terminated_length": 560.569580078125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 12.681049562682215, "grad_norm": 0.24663449823856354, "learning_rate": 1e-06, "loss": -0.033, "num_tokens": 740937878.0, "reward": 0.621651828289032, "reward_std": 0.14545360207557678, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.4852459728717804, "step": 1357 }, { "clip_ratio/high_max": 0.002267629883135669, "clip_ratio/high_mean": 0.0007278704006239423, "clip_ratio/low_mean": 0.0002739739552453102, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010018443445005687, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2830.0, "completions/mean_length": 850.068115234375, "completions/mean_terminated_length": 483.1366271972656, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 12.690379008746355, "grad_norm": 0.22954854369163513, "learning_rate": 1e-06, "loss": -0.0584, "num_tokens": 741413035.0, "reward": 0.7243303656578064, "reward_std": 0.12546339631080627, "rewards/verify_math_reward/mean": 0.7243303656578064, "rewards/verify_math_reward/std": 0.4471006691455841, "step": 1358 }, { "clip_ratio/high_max": 0.0023479087903979234, "clip_ratio/high_mean": 0.0006960446326047531, "clip_ratio/low_mean": 0.0004340857358329231, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001130130356614245, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3638.0, "completions/mean_length": 889.75341796875, "completions/mean_terminated_length": 518.419677734375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 12.699708454810496, "grad_norm": 0.2435080111026764, "learning_rate": 1e-06, "loss": -0.051, "num_tokens": 741919382.0, "reward": 0.7209821939468384, "reward_std": 0.11633063107728958, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1359 }, { "clip_ratio/high_max": 0.0017577246399014257, "clip_ratio/high_mean": 0.0007076831798258354, "clip_ratio/low_mean": 0.0003529586056174594, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001060641781805316, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 1940.0, "completions/mean_length": 1041.13623046875, "completions/mean_terminated_length": 508.636962890625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.709037900874636, "grad_norm": 0.6882129907608032, "learning_rate": 1e-06, "loss": -0.0663, "num_tokens": 742390752.0, "reward": 0.6328125, "reward_std": 0.16250787675380707, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1360 }, { "clip_ratio/high_max": 0.0015584094544465188, "clip_ratio/high_mean": 0.0005284538101477665, "clip_ratio/low_mean": 0.00036905520255459123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008975089986051898, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 801.7031860351562, "completions/mean_terminated_length": 491.98291015625, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 12.718367346938775, "grad_norm": 0.28293901681900024, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 742874982.0, "reward": 0.6584821939468384, "reward_std": 0.12644091248512268, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1361 }, { "clip_ratio/high_max": 0.0017015195444400888, "clip_ratio/high_mean": 0.00047585794163751416, "clip_ratio/low_mean": 0.000347914735584709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008237726760853548, "completions/clipped_ratio": 0.15625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3659.0, "completions/mean_length": 1136.7254638671875, "completions/mean_terminated_length": 588.7116088867188, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.727696793002915, "grad_norm": 0.20893487334251404, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 743411312.0, "reward": 0.5569196939468384, "reward_std": 0.10618643462657928, "rewards/verify_math_reward/mean": 0.5569196343421936, "rewards/verify_math_reward/std": 0.4970270097255707, "step": 1362 }, { "clip_ratio/high_max": 0.0019468862883513793, "clip_ratio/high_mean": 0.0006948173395358026, "clip_ratio/low_mean": 0.0003842596770482487, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00107907703568344, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3201.0, "completions/mean_length": 1069.0826416015625, "completions/mean_terminated_length": 550.7477416992188, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 12.737026239067056, "grad_norm": 0.2141713947057724, "learning_rate": 1e-06, "loss": -0.0741, "num_tokens": 743925954.0, "reward": 0.574776828289032, "reward_std": 0.14777731895446777, "rewards/verify_math_reward/mean": 0.5747767686843872, "rewards/verify_math_reward/std": 0.49465295672416687, "step": 1363 }, { "clip_ratio/high_max": 0.003098163040704094, "clip_ratio/high_mean": 0.000917768931685714, "clip_ratio/low_mean": 0.00045799038343830034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013757593078480568, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3554.0, "completions/mean_length": 963.3136596679688, "completions/mean_terminated_length": 551.9507446289062, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 12.746355685131196, "grad_norm": 0.25729474425315857, "learning_rate": 1e-06, "loss": -0.0563, "num_tokens": 744464371.0, "reward": 0.6339285969734192, "reward_std": 0.14466404914855957, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1364 }, { "clip_ratio/high_max": 0.0019777921406785026, "clip_ratio/high_mean": 0.0006717506166751264, "clip_ratio/low_mean": 0.0004016095765564387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010733601920946967, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2520.0, "completions/mean_length": 806.6495971679688, "completions/mean_terminated_length": 497.3944091796875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 12.755685131195335, "grad_norm": 0.26544907689094543, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 744951817.0, "reward": 0.7008928656578064, "reward_std": 0.1327543705701828, "rewards/verify_math_reward/mean": 0.7008928656578064, "rewards/verify_math_reward/std": 0.458122581243515, "step": 1365 }, { "clip_ratio/high_max": 0.0015571737858408596, "clip_ratio/high_mean": 0.0005872535057278583, "clip_ratio/low_mean": 0.0003519047841109568, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009391582843818469, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3847.0, "completions/mean_length": 1048.9754638671875, "completions/mean_terminated_length": 582.3140258789062, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 12.765014577259475, "grad_norm": 0.3987433910369873, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 745494731.0, "reward": 0.6171875, "reward_std": 0.14451251924037933, "rewards/verify_math_reward/mean": 0.6171875, "rewards/verify_math_reward/std": 0.4863446056842804, "step": 1366 }, { "clip_ratio/high_max": 0.0017320850420219358, "clip_ratio/high_mean": 0.0005912239066674374, "clip_ratio/low_mean": 0.00039086757260520244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009820914929150604, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3386.0, "completions/mean_length": 1037.1551513671875, "completions/mean_terminated_length": 518.030029296875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 12.774344023323614, "grad_norm": 99.93839263916016, "learning_rate": 1e-06, "loss": -0.053, "num_tokens": 745977630.0, "reward": 0.6328125, "reward_std": 0.1306820958852768, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1367 }, { "clip_ratio/high_max": 0.001804772560717538, "clip_ratio/high_mean": 0.0006167724113765871, "clip_ratio/low_mean": 0.00026702754712459864, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008837999321258394, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 972.40185546875, "completions/mean_terminated_length": 484.7174377441406, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 12.783673469387756, "grad_norm": 0.3347950875759125, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 746439430.0, "reward": 0.6729910969734192, "reward_std": 0.11866390705108643, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1368 }, { "clip_ratio/high_max": 0.002638830730575137, "clip_ratio/high_mean": 0.0008918509229260962, "clip_ratio/low_mean": 0.0003563135458080069, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012481644735089503, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3441.0, "completions/mean_length": 964.9297485351562, "completions/mean_terminated_length": 558.2459106445312, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 12.793002915451895, "grad_norm": 0.3066607415676117, "learning_rate": 1e-06, "loss": -0.0545, "num_tokens": 746983927.0, "reward": 0.6540178656578064, "reward_std": 0.15559779107570648, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1369 }, { "clip_ratio/high_max": 0.0014095676415308844, "clip_ratio/high_mean": 0.00046644072790513746, "clip_ratio/low_mean": 0.000292919801267999, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007593605332658626, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3962.0, "completions/mean_length": 906.2188110351562, "completions/mean_terminated_length": 558.8168334960938, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 12.802332361516035, "grad_norm": 0.21188370883464813, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 747526211.0, "reward": 0.7165178656578064, "reward_std": 0.11745856702327728, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 1370 }, { "clip_ratio/high_max": 0.0019549962380551733, "clip_ratio/high_mean": 0.0007605743721796898, "clip_ratio/low_mean": 0.0003989964870925178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011595708892855328, "completions/clipped_ratio": 0.1529017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4065.0, "completions/mean_length": 1083.130615234375, "completions/mean_terminated_length": 539.3056640625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 12.811661807580174, "grad_norm": 0.24704080820083618, "learning_rate": 1e-06, "loss": -0.0632, "num_tokens": 748038552.0, "reward": 0.5848214626312256, "reward_std": 0.15064233541488647, "rewards/verify_math_reward/mean": 0.5848214030265808, "rewards/verify_math_reward/std": 0.49302801489830017, "step": 1371 }, { "clip_ratio/high_max": 0.0019636064862424973, "clip_ratio/high_mean": 0.0007228554368339246, "clip_ratio/low_mean": 0.0004627469643310178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011856024138978682, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 1033.9085693359375, "completions/mean_terminated_length": 560.38916015625, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 12.820991253644316, "grad_norm": 0.2643205225467682, "learning_rate": 1e-06, "loss": -0.0554, "num_tokens": 748566814.0, "reward": 0.613839328289032, "reward_std": 0.1306481957435608, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1372 }, { "clip_ratio/high_max": 0.0022867963889439125, "clip_ratio/high_mean": 0.0008140832942444831, "clip_ratio/low_mean": 0.000571572565149836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001385655839840183, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2339.0, "completions/mean_length": 1041.462158203125, "completions/mean_terminated_length": 546.2386474609375, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.830320699708455, "grad_norm": 0.8475359678268433, "learning_rate": 1e-06, "loss": -0.0317, "num_tokens": 749084676.0, "reward": 0.640625, "reward_std": 0.1382429599761963, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1373 }, { "clip_ratio/high_max": 0.002273448451887816, "clip_ratio/high_mean": 0.0008107989287964301, "clip_ratio/low_mean": 0.0003847020611829066, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011955010231758934, "completions/clipped_ratio": 0.1183035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4033.0, "completions/mean_length": 929.5770263671875, "completions/mean_terminated_length": 504.7151794433594, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 12.839650145772595, "grad_norm": 0.24407708644866943, "learning_rate": 1e-06, "loss": -0.1056, "num_tokens": 749570417.0, "reward": 0.6707589626312256, "reward_std": 0.14451363682746887, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1374 }, { "clip_ratio/high_max": 0.002060843755316455, "clip_ratio/high_mean": 0.0007619017005708884, "clip_ratio/low_mean": 0.0004417857421685767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001203687432280276, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4055.0, "completions/mean_length": 935.341552734375, "completions/mean_terminated_length": 502.1548156738281, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 12.848979591836734, "grad_norm": 0.29424676299095154, "learning_rate": 1e-06, "loss": -0.0413, "num_tokens": 750058099.0, "reward": 0.6707589626312256, "reward_std": 0.1353893280029297, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1375 }, { "clip_ratio/high_max": 0.00217004971273127, "clip_ratio/high_mean": 0.0008153443122864701, "clip_ratio/low_mean": 0.0004750150524159835, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012903593487862963, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2332.0, "completions/mean_length": 999.8449096679688, "completions/mean_terminated_length": 557.5369873046875, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 12.858309037900874, "grad_norm": 1.1741997003555298, "learning_rate": 1e-06, "loss": -0.0702, "num_tokens": 750583248.0, "reward": 0.6194196939468384, "reward_std": 0.16085955500602722, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1376 }, { "clip_ratio/high_max": 0.0015669029671698809, "clip_ratio/high_mean": 0.0006371952913468704, "clip_ratio/low_mean": 0.0003702701142174192, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010074654128402472, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3462.0, "completions/mean_length": 947.0256958007812, "completions/mean_terminated_length": 586.6952514648438, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 12.867638483965015, "grad_norm": 0.7590116858482361, "learning_rate": 1e-06, "loss": -0.0386, "num_tokens": 751139935.0, "reward": 0.6696428656578064, "reward_std": 0.1619868129491806, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1377 }, { "clip_ratio/high_max": 0.0018466759247530717, "clip_ratio/high_mean": 0.0005880852513655555, "clip_ratio/low_mean": 0.0005118016952110338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001099886947486084, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 1103.4576416015625, "completions/mean_terminated_length": 622.7901611328125, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 12.876967930029155, "grad_norm": 0.21544533967971802, "learning_rate": 1e-06, "loss": -0.055, "num_tokens": 751723785.0, "reward": 0.559151828289032, "reward_std": 0.13016286492347717, "rewards/verify_math_reward/mean": 0.5591517686843872, "rewards/verify_math_reward/std": 0.496766060590744, "step": 1378 }, { "clip_ratio/high_max": 0.0013301987582963193, "clip_ratio/high_mean": 0.0005247186995802622, "clip_ratio/low_mean": 0.0003667620244414138, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008914807040127926, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 812.9944458007812, "completions/mean_terminated_length": 547.6610717773438, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 12.886297376093294, "grad_norm": 0.25212574005126953, "learning_rate": 1e-06, "loss": -0.0311, "num_tokens": 752274788.0, "reward": 0.6640625, "reward_std": 0.1315172165632248, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1379 }, { "clip_ratio/high_max": 0.002105562853103038, "clip_ratio/high_mean": 0.0008208707604353549, "clip_ratio/low_mean": 0.0003263071916990157, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011471779544081073, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3309.0, "completions/mean_length": 946.364990234375, "completions/mean_terminated_length": 559.5676879882812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 12.895626822157434, "grad_norm": 0.24270500242710114, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 752807811.0, "reward": 0.6495535969734192, "reward_std": 0.14282114803791046, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1380 }, { "clip_ratio/high_max": 0.0019212423212593421, "clip_ratio/high_mean": 0.0007278142074937932, "clip_ratio/low_mean": 0.0003460776024439838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010738918390416075, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3627.0, "completions/mean_length": 1041.673095703125, "completions/mean_terminated_length": 564.8038940429688, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.904956268221575, "grad_norm": 0.28183022141456604, "learning_rate": 1e-06, "loss": -0.0621, "num_tokens": 753343046.0, "reward": 0.6328125, "reward_std": 0.14609551429748535, "rewards/verify_math_reward/mean": 0.6328125, "rewards/verify_math_reward/std": 0.48230743408203125, "step": 1381 }, { "clip_ratio/high_max": 0.0017274372112296987, "clip_ratio/high_mean": 0.0005665796907123877, "clip_ratio/low_mean": 0.0003050948271265952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008716745269339299, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2056.0, "completions/mean_length": 832.8092041015625, "completions/mean_terminated_length": 530.3670654296875, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 12.914285714285715, "grad_norm": 0.27039772272109985, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 753858491.0, "reward": 0.7020089626312256, "reward_std": 0.10562372952699661, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1382 }, { "clip_ratio/high_max": 0.0020260836972738616, "clip_ratio/high_mean": 0.0008750749730097596, "clip_ratio/low_mean": 0.00044082958811486606, "clip_ratio/low_min": 1.582679215061944e-05, "clip_ratio/region_mean": 0.0013159045302018058, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3756.0, "completions/mean_length": 869.013427734375, "completions/mean_terminated_length": 517.5593872070312, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 12.923615160349854, "grad_norm": 0.5587045550346375, "learning_rate": 1e-06, "loss": -0.0604, "num_tokens": 754374567.0, "reward": 0.7098214626312256, "reward_std": 0.16119515895843506, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1383 }, { "clip_ratio/high_max": 0.0018916974077001214, "clip_ratio/high_mean": 0.0007225484514492564, "clip_ratio/low_mean": 0.00046064554999247775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001183193988254061, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 997.6082763671875, "completions/mean_terminated_length": 550.45849609375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 12.932944606413994, "grad_norm": 1.0377944707870483, "learning_rate": 1e-06, "loss": -0.0768, "num_tokens": 754898000.0, "reward": 0.6707589626312256, "reward_std": 0.14846131205558777, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1384 }, { "clip_ratio/high_max": 0.0018531905989220832, "clip_ratio/high_mean": 0.0006839078305347357, "clip_ratio/low_mean": 0.00046031783040234586, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011442256582085975, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3490.0, "completions/mean_length": 1012.8058471679688, "completions/mean_terminated_length": 581.3155517578125, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 12.942274052478133, "grad_norm": 0.3049590587615967, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 755444122.0, "reward": 0.6696428656578064, "reward_std": 0.15999305248260498, "rewards/verify_math_reward/mean": 0.6696428656578064, "rewards/verify_math_reward/std": 0.47060438990592957, "step": 1385 }, { "clip_ratio/high_max": 0.0019815594714600593, "clip_ratio/high_mean": 0.0007506612710130867, "clip_ratio/low_mean": 0.0005363260279409587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001286987288040109, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2481.0, "completions/mean_length": 996.0480346679688, "completions/mean_terminated_length": 562.2124633789062, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 12.951603498542275, "grad_norm": 0.29062795639038086, "learning_rate": 1e-06, "loss": -0.053, "num_tokens": 755972237.0, "reward": 0.6227678656578064, "reward_std": 0.17149202525615692, "rewards/verify_math_reward/mean": 0.6227678656578064, "rewards/verify_math_reward/std": 0.4849644899368286, "step": 1386 }, { "clip_ratio/high_max": 0.0021974281225993764, "clip_ratio/high_mean": 0.0007501259115088033, "clip_ratio/low_mean": 0.0005105942154841614, "clip_ratio/low_min": 2.6047093342640437e-05, "clip_ratio/region_mean": 0.001260720135178417, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1612.0, "completions/mean_length": 890.4129638671875, "completions/mean_terminated_length": 487.70098876953125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 12.960932944606414, "grad_norm": 0.2648910880088806, "learning_rate": 1e-06, "loss": -0.0517, "num_tokens": 756457031.0, "reward": 0.6551339626312256, "reward_std": 0.14507775008678436, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1387 }, { "clip_ratio/high_max": 0.0019278808067610953, "clip_ratio/high_mean": 0.0006655543675151421, "clip_ratio/low_mean": 0.0002514504044484056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009170047651423374, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2854.0, "completions/mean_length": 891.9263916015625, "completions/mean_terminated_length": 502.9461975097656, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 12.970262390670554, "grad_norm": 0.35309767723083496, "learning_rate": 1e-06, "loss": -0.0327, "num_tokens": 756946101.0, "reward": 0.6462053656578064, "reward_std": 0.10867238789796829, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1388 }, { "clip_ratio/high_max": 0.0013791169112664647, "clip_ratio/high_mean": 0.00040248090772365686, "clip_ratio/low_mean": 0.00025305602434855246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006555369454872562, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 1010.2020263671875, "completions/mean_terminated_length": 519.1889038085938, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 12.979591836734693, "grad_norm": 0.2338443100452423, "learning_rate": 1e-06, "loss": -0.0402, "num_tokens": 757438650.0, "reward": 0.6261160969734192, "reward_std": 0.09333452582359314, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1389 }, { "clip_ratio/high_max": 0.0017437691567465663, "clip_ratio/high_mean": 0.0005722645419155015, "clip_ratio/low_mean": 0.00034574814435472945, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009180126762657892, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3830.0, "completions/mean_length": 1025.6082763671875, "completions/mean_terminated_length": 573.501953125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 12.988921282798835, "grad_norm": 0.22266581654548645, "learning_rate": 1e-06, "loss": -0.0766, "num_tokens": 757975123.0, "reward": 0.6417410969734192, "reward_std": 0.14158402383327484, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975659370422363, "step": 1390 }, { "clip_ratio/high_max": 0.0015518709733441938, "clip_ratio/high_mean": 0.0005661605073328246, "clip_ratio/low_mean": 0.0002864747480089136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008526352467015386, "completions/clipped_ratio": 0.07670454545454541, "completions/max_length": 4096.0, "completions/max_terminated_length": 2614.0, "completions/mean_length": 853.1704711914062, "completions/mean_terminated_length": 583.7661743164062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 12.998250728862974, "grad_norm": 0.23627407848834991, "learning_rate": 1e-06, "loss": -0.0371, "num_tokens": 758497122.0, "reward": 0.6551339626312256, "reward_std": 0.11727241426706314, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1391 }, { "clip_ratio/high_max": 0.0015648082808183972, "clip_ratio/high_mean": 0.0004424165472300956, "clip_ratio/low_mean": 0.00036852742709925224, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008109439877443947, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3467.0, "completions/mean_length": 931.3951416015625, "completions/mean_terminated_length": 538.3011474609375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.00932944606414, "grad_norm": 0.20707617700099945, "learning_rate": 1e-06, "loss": -0.0376, "num_tokens": 759014796.0, "reward": 0.625, "reward_std": 0.11050571501255035, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1392 }, { "clip_ratio/high_max": 0.001504514177213423, "clip_ratio/high_mean": 0.0005346524048945867, "clip_ratio/low_mean": 0.0003906382294189825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009252906565961894, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3606.0, "completions/mean_length": 900.5045166015625, "completions/mean_terminated_length": 591.5153198242188, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 13.018658892128279, "grad_norm": 0.3734883666038513, "learning_rate": 1e-06, "loss": -0.0196, "num_tokens": 759577432.0, "reward": 0.7064732313156128, "reward_std": 0.13042137026786804, "rewards/verify_math_reward/mean": 0.7064732313156128, "rewards/verify_math_reward/std": 0.4556320011615753, "step": 1393 }, { "clip_ratio/high_max": 0.0021092421702633146, "clip_ratio/high_mean": 0.0007486360846087337, "clip_ratio/low_mean": 0.0004456653541637934, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011943014796997886, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2815.0, "completions/mean_length": 1086.9754638671875, "completions/mean_terminated_length": 571.70458984375, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 13.02798833819242, "grad_norm": 0.24691501259803772, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 760105314.0, "reward": 0.5725446939468384, "reward_std": 0.15541164577007294, "rewards/verify_math_reward/mean": 0.5725446343421936, "rewards/verify_math_reward/std": 0.49498558044433594, "step": 1394 }, { "clip_ratio/high_max": 0.0016860339765116805, "clip_ratio/high_mean": 0.000513932033754827, "clip_ratio/low_mean": 0.00025348205599584617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007674140888411785, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2764.0, "completions/mean_length": 901.1295166015625, "completions/mean_terminated_length": 508.7769470214844, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 13.03731778425656, "grad_norm": 0.23121798038482666, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 760598846.0, "reward": 0.668526828289032, "reward_std": 0.10708937793970108, "rewards/verify_math_reward/mean": 0.6685267686843872, "rewards/verify_math_reward/std": 0.4710056483745575, "step": 1395 }, { "clip_ratio/high_max": 0.001584160105267074, "clip_ratio/high_mean": 0.0005649628828905406, "clip_ratio/low_mean": 0.0003471491600066656, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000912112052901648, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3119.0, "completions/mean_length": 912.9933471679688, "completions/mean_terminated_length": 531.032470703125, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 13.0466472303207, "grad_norm": 0.2560865581035614, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 761115712.0, "reward": 0.6473214626312256, "reward_std": 0.13215592503547668, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1396 }, { "clip_ratio/high_max": 0.0018003426266659517, "clip_ratio/high_mean": 0.0005776014768343884, "clip_ratio/low_mean": 0.00042263580576218374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010002372819144512, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2059.0, "completions/mean_length": 826.4096069335938, "completions/mean_terminated_length": 510.2558288574219, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.055976676384839, "grad_norm": 0.23720556497573853, "learning_rate": 1e-06, "loss": -0.0284, "num_tokens": 761625543.0, "reward": 0.6986607313156128, "reward_std": 0.11468091607093811, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1397 }, { "clip_ratio/high_max": 0.0019789553443843033, "clip_ratio/high_mean": 0.0007179691847341019, "clip_ratio/low_mean": 0.00040239865552393894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011203678404854145, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 849.7957763671875, "completions/mean_terminated_length": 527.1668701171875, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 13.06530612244898, "grad_norm": 0.2802946865558624, "learning_rate": 1e-06, "loss": -0.021, "num_tokens": 762133072.0, "reward": 0.7332589626312256, "reward_std": 0.1350441575050354, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425029158592224, "step": 1398 }, { "clip_ratio/high_max": 0.0019997288036393, "clip_ratio/high_mean": 0.000648982097118278, "clip_ratio/low_mean": 0.00043257709376121056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001081559184967773, "completions/clipped_ratio": 0.0993303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 900.3817138671875, "completions/mean_terminated_length": 547.9529418945312, "completions/min_length": 163.0, "completions/min_terminated_length": 163.0, "epoch": 13.07463556851312, "grad_norm": 0.24073903262615204, "learning_rate": 1e-06, "loss": -0.0426, "num_tokens": 762663694.0, "reward": 0.6729910969734192, "reward_std": 0.13624738156795502, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1399 }, { "clip_ratio/high_max": 0.002562109522841638, "clip_ratio/high_mean": 0.0007564049938082462, "clip_ratio/low_mean": 0.00029234775456643547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010487527415534714, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3513.0, "completions/mean_length": 989.154052734375, "completions/mean_terminated_length": 549.842041015625, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 13.08396501457726, "grad_norm": 0.4724392592906952, "learning_rate": 1e-06, "loss": -0.0522, "num_tokens": 763192256.0, "reward": 0.6774553656578064, "reward_std": 0.11617907881736755, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1400 }, { "clip_ratio/high_max": 0.0011260485043749213, "clip_ratio/high_mean": 0.00043029518383264076, "clip_ratio/low_mean": 0.00026256952924086363, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006928647289896617, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3987.0, "completions/mean_length": 935.27685546875, "completions/mean_terminated_length": 542.6649780273438, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 13.093294460641399, "grad_norm": 0.20758171379566193, "learning_rate": 1e-06, "loss": -0.041, "num_tokens": 763706592.0, "reward": 0.6808035969734192, "reward_std": 0.10987518727779388, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.46642565727233887, "step": 1401 }, { "clip_ratio/high_max": 0.0015355567702499684, "clip_ratio/high_mean": 0.0005331959373506834, "clip_ratio/low_mean": 0.0002480945679508295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007812905132595915, "completions/clipped_ratio": 0.0904017857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3594.0, "completions/mean_length": 866.8370971679688, "completions/mean_terminated_length": 545.90185546875, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 13.102623906705539, "grad_norm": 0.2250635325908661, "learning_rate": 1e-06, "loss": -0.0561, "num_tokens": 764243726.0, "reward": 0.7109375596046448, "reward_std": 0.1287727802991867, "rewards/verify_math_reward/mean": 0.7109375, "rewards/verify_math_reward/std": 0.45358020067214966, "step": 1402 }, { "clip_ratio/high_max": 0.0014361132161866408, "clip_ratio/high_mean": 0.0005224953333708982, "clip_ratio/low_mean": 0.00032753702953414177, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008500323574480717, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 1042.0513916015625, "completions/mean_terminated_length": 533.0599365234375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 13.11195335276968, "grad_norm": 0.20067481696605682, "learning_rate": 1e-06, "loss": -0.0431, "num_tokens": 764748476.0, "reward": 0.6149553656578064, "reward_std": 0.10919302701950073, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1403 }, { "clip_ratio/high_max": 0.0017143203658633865, "clip_ratio/high_mean": 0.0006862149748485535, "clip_ratio/low_mean": 0.0004324463725424721, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011186613301106263, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3250.0, "completions/mean_length": 987.5670166015625, "completions/mean_terminated_length": 538.9680786132812, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 13.12128279883382, "grad_norm": 0.250449538230896, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 765262008.0, "reward": 0.6484375, "reward_std": 0.14350220561027527, "rewards/verify_math_reward/mean": 0.6484375, "rewards/verify_math_reward/std": 0.4777248501777649, "step": 1404 }, { "clip_ratio/high_max": 0.00151262997314916, "clip_ratio/high_mean": 0.0005685164214810356, "clip_ratio/low_mean": 0.0004395626838231692, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010080791180371307, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3900.0, "completions/mean_length": 1107.204345703125, "completions/mean_terminated_length": 613.60595703125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 13.130612244897959, "grad_norm": 0.3064783811569214, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 765829791.0, "reward": 0.6004464626312256, "reward_std": 0.13339374959468842, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1405 }, { "clip_ratio/high_max": 0.002421735345706111, "clip_ratio/high_mean": 0.0008009010516616399, "clip_ratio/low_mean": 0.00047693476335552987, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012778357995557599, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3001.0, "completions/mean_length": 982.33935546875, "completions/mean_terminated_length": 560.0811157226562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.139941690962099, "grad_norm": 0.275840699672699, "learning_rate": 1e-06, "loss": -0.0446, "num_tokens": 766365839.0, "reward": 0.6462053656578064, "reward_std": 0.14440374076366425, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1406 }, { "clip_ratio/high_max": 0.0023350001429207623, "clip_ratio/high_mean": 0.0007682941577513702, "clip_ratio/low_mean": 0.0004151979937887518, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011834921679110266, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3665.0, "completions/mean_length": 827.6417846679688, "completions/mean_terminated_length": 529.0706787109375, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.14927113702624, "grad_norm": 0.24996188282966614, "learning_rate": 1e-06, "loss": -0.0533, "num_tokens": 766881086.0, "reward": 0.7254464626312256, "reward_std": 0.1504889577627182, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1407 }, { "clip_ratio/high_max": 0.0017211684680660255, "clip_ratio/high_mean": 0.0005624990271826391, "clip_ratio/low_mean": 0.0002965925878015696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008590916058892617, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2450.0, "completions/mean_length": 923.786865234375, "completions/mean_terminated_length": 560.7972412109375, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 13.15860058309038, "grad_norm": 0.6004595756530762, "learning_rate": 1e-06, "loss": -0.0213, "num_tokens": 767425871.0, "reward": 0.645089328289032, "reward_std": 0.11963889002799988, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1408 }, { "clip_ratio/high_max": 0.001921835766552249, "clip_ratio/high_mean": 0.0005753110599471256, "clip_ratio/low_mean": 0.0003687656890178914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009440767371415859, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2979.0, "completions/mean_length": 973.693115234375, "completions/mean_terminated_length": 536.72900390625, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 13.167930029154519, "grad_norm": 9.168828964233398, "learning_rate": 1e-06, "loss": -0.0427, "num_tokens": 767954332.0, "reward": 0.590401828289032, "reward_std": 0.12651577591896057, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1409 }, { "clip_ratio/high_max": 0.0019108351189061068, "clip_ratio/high_mean": 0.0007167610619944753, "clip_ratio/low_mean": 0.00038995804061414674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011067190862377174, "completions/clipped_ratio": 0.0803571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2387.0, "completions/mean_length": 803.6808471679688, "completions/mean_terminated_length": 516.00244140625, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 13.177259475218658, "grad_norm": 0.26358672976493835, "learning_rate": 1e-06, "loss": -0.0429, "num_tokens": 768473718.0, "reward": 0.7667410969734192, "reward_std": 0.12482510507106781, "rewards/verify_math_reward/mean": 0.7667410969734192, "rewards/verify_math_reward/std": 0.42314186692237854, "step": 1410 }, { "clip_ratio/high_max": 0.0019463468961475883, "clip_ratio/high_mean": 0.0006621730399274384, "clip_ratio/low_mean": 0.00044918252797288005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011113555610791082, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 2986.0, "completions/mean_length": 893.7109985351562, "completions/mean_terminated_length": 531.7130126953125, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.186588921282798, "grad_norm": 0.2778402864933014, "learning_rate": 1e-06, "loss": -0.0847, "num_tokens": 768986131.0, "reward": 0.7254464626312256, "reward_std": 0.15094542503356934, "rewards/verify_math_reward/mean": 0.7254464030265808, "rewards/verify_math_reward/std": 0.4465382993221283, "step": 1411 }, { "clip_ratio/high_max": 0.0022086741228122264, "clip_ratio/high_mean": 0.00074517638040561, "clip_ratio/low_mean": 0.0005234462605585577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012686226509686094, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3485.0, "completions/mean_length": 1080.279052734375, "completions/mean_terminated_length": 568.4725952148438, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 13.19591836734694, "grad_norm": 0.2508910000324249, "learning_rate": 1e-06, "loss": -0.0595, "num_tokens": 769516429.0, "reward": 0.6049107313156128, "reward_std": 0.15988317131996155, "rewards/verify_math_reward/mean": 0.6049107313156128, "rewards/verify_math_reward/std": 0.48914292454719543, "step": 1412 }, { "clip_ratio/high_max": 0.0021129219530848786, "clip_ratio/high_mean": 0.0008120713246171363, "clip_ratio/low_mean": 0.00045953432436363073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012716056262433995, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3032.0, "completions/mean_length": 909.0960083007812, "completions/mean_terminated_length": 508.73114013671875, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.205247813411079, "grad_norm": 0.4871099591255188, "learning_rate": 1e-06, "loss": -0.0572, "num_tokens": 770010651.0, "reward": 0.6718750596046448, "reward_std": 0.14842741191387177, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1413 }, { "clip_ratio/high_max": 0.0020849489519605413, "clip_ratio/high_mean": 0.0008023624959605513, "clip_ratio/low_mean": 0.0004160077241976978, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012183701910544187, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 1939.0, "completions/mean_length": 979.4888916015625, "completions/mean_terminated_length": 534.27294921875, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 13.214577259475218, "grad_norm": 0.29521510004997253, "learning_rate": 1e-06, "loss": -0.0411, "num_tokens": 770518217.0, "reward": 0.6272321939468384, "reward_std": 0.15331120789051056, "rewards/verify_math_reward/mean": 0.6272321343421936, "rewards/verify_math_reward/std": 0.4838111698627472, "step": 1414 }, { "clip_ratio/high_max": 0.0017796547799662221, "clip_ratio/high_mean": 0.0006175994385557715, "clip_ratio/low_mean": 0.0004503474581269984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010679468869057018, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3553.0, "completions/mean_length": 942.5335083007812, "completions/mean_terminated_length": 559.6971435546875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 13.223906705539358, "grad_norm": 0.338432252407074, "learning_rate": 1e-06, "loss": -0.0538, "num_tokens": 771061911.0, "reward": 0.6618303656578064, "reward_std": 0.14451110363006592, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1415 }, { "clip_ratio/high_max": 0.0017426106714992784, "clip_ratio/high_mean": 0.0005744925165345194, "clip_ratio/low_mean": 0.0002698533530747227, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008443458555120742, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2392.0, "completions/mean_length": 861.1127319335938, "completions/mean_terminated_length": 499.8970031738281, "completions/min_length": 100.0, "completions/min_terminated_length": 100.0, "epoch": 13.2332361516035, "grad_norm": 0.9795376658439636, "learning_rate": 1e-06, "loss": -0.049, "num_tokens": 771560692.0, "reward": 0.6417410969734192, "reward_std": 0.11314068734645844, "rewards/verify_math_reward/mean": 0.6417410969734192, "rewards/verify_math_reward/std": 0.47975656390190125, "step": 1416 }, { "clip_ratio/high_max": 0.0021070267575851176, "clip_ratio/high_mean": 0.000787390230470919, "clip_ratio/low_mean": 0.0003916053956345422, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011789956006396096, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2024.0, "completions/mean_length": 901.21435546875, "completions/mean_terminated_length": 566.3723754882812, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.242565597667639, "grad_norm": 0.6604495048522949, "learning_rate": 1e-06, "loss": -0.0287, "num_tokens": 772100260.0, "reward": 0.676339328289032, "reward_std": 0.13970720767974854, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 1417 }, { "clip_ratio/high_max": 0.001792183225916233, "clip_ratio/high_mean": 0.0006919041243236279, "clip_ratio/low_mean": 0.0004517107954598032, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011436149397923145, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3738.0, "completions/mean_length": 930.6551513671875, "completions/mean_terminated_length": 568.4514770507812, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 13.251895043731778, "grad_norm": 0.2922399044036865, "learning_rate": 1e-06, "loss": -0.0396, "num_tokens": 772650519.0, "reward": 0.6830357313156128, "reward_std": 0.14564089477062225, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1418 }, { "clip_ratio/high_max": 0.001605254807145684, "clip_ratio/high_mean": 0.0005620021677259501, "clip_ratio/low_mean": 0.00038511040793309803, "clip_ratio/low_min": 2.6354311557952315e-05, "clip_ratio/region_mean": 0.0009471125977142947, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2341.0, "completions/mean_length": 880.357177734375, "completions/mean_terminated_length": 590.8710327148438, "completions/min_length": 173.0, "completions/min_terminated_length": 173.0, "epoch": 13.261224489795918, "grad_norm": 0.23736920952796936, "learning_rate": 1e-06, "loss": -0.0526, "num_tokens": 773225039.0, "reward": 0.652901828289032, "reward_std": 0.14560584723949432, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 1419 }, { "clip_ratio/high_max": 0.0021306603885022923, "clip_ratio/high_mean": 0.0007419451176247094, "clip_ratio/low_mean": 0.00027688059344654903, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010188257110712584, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2853.0, "completions/mean_length": 880.5636596679688, "completions/mean_terminated_length": 494.71124267578125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 13.270553935860057, "grad_norm": 0.2683456242084503, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 773715688.0, "reward": 0.723214328289032, "reward_std": 0.12723001837730408, "rewards/verify_math_reward/mean": 0.7232142686843872, "rewards/verify_math_reward/std": 0.44765952229499817, "step": 1420 }, { "clip_ratio/high_max": 0.0016470260743517429, "clip_ratio/high_mean": 0.000479669638480118, "clip_ratio/low_mean": 0.00032177978164327214, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00080144939784077, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3451.0, "completions/mean_length": 834.5413208007812, "completions/mean_terminated_length": 519.173828125, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 13.279883381924199, "grad_norm": 0.5516514182090759, "learning_rate": 1e-06, "loss": -0.0316, "num_tokens": 774220701.0, "reward": 0.6852678656578064, "reward_std": 0.08503435552120209, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.4646684527397156, "step": 1421 }, { "clip_ratio/high_max": 0.0023291828474611975, "clip_ratio/high_mean": 0.0007562954651803011, "clip_ratio/low_mean": 0.0004190416448182077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011753371181839611, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3014.0, "completions/mean_length": 956.0960083007812, "completions/mean_terminated_length": 566.0702514648438, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 13.289212827988338, "grad_norm": 0.2889426648616791, "learning_rate": 1e-06, "loss": -0.0573, "num_tokens": 774763931.0, "reward": 0.6752232313156128, "reward_std": 0.15811371803283691, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1422 }, { "clip_ratio/high_max": 0.0013299583024490857, "clip_ratio/high_mean": 0.00039771064666638267, "clip_ratio/low_mean": 0.00024158328778867144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006392939449142432, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2034.0, "completions/mean_length": 812.0636596679688, "completions/mean_terminated_length": 520.7788696289062, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 13.298542274052478, "grad_norm": 0.263927161693573, "learning_rate": 1e-06, "loss": -0.0161, "num_tokens": 775281860.0, "reward": 0.7098214626312256, "reward_std": 0.09055617451667786, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1423 }, { "clip_ratio/high_max": 0.0015924356375762727, "clip_ratio/high_mean": 0.0004741024454233411, "clip_ratio/low_mean": 0.0002808431995617866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007549456604465377, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2852.0, "completions/mean_length": 905.3761596679688, "completions/mean_terminated_length": 531.4127197265625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 13.307871720116617, "grad_norm": 0.18549026548862457, "learning_rate": 1e-06, "loss": -0.0511, "num_tokens": 775801477.0, "reward": 0.6149553656578064, "reward_std": 0.10626451671123505, "rewards/verify_math_reward/mean": 0.6149553656578064, "rewards/verify_math_reward/std": 0.4868776500225067, "step": 1424 }, { "clip_ratio/high_max": 0.0018041823350358754, "clip_ratio/high_mean": 0.0006176234282975201, "clip_ratio/low_mean": 0.00038572605035369634, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001003349490929395, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4017.0, "completions/mean_length": 970.0201416015625, "completions/mean_terminated_length": 568.44580078125, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 13.317201166180759, "grad_norm": 0.30052486062049866, "learning_rate": 1e-06, "loss": -0.0686, "num_tokens": 776333047.0, "reward": 0.5814732313156128, "reward_std": 0.15950380265712738, "rewards/verify_math_reward/mean": 0.5814732313156128, "rewards/verify_math_reward/std": 0.4935929775238037, "step": 1425 }, { "clip_ratio/high_max": 0.0017893044496304356, "clip_ratio/high_mean": 0.0005078815574961482, "clip_ratio/low_mean": 0.0004161108709013206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009239924329449423, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3203.0, "completions/mean_length": 864.2210083007812, "completions/mean_terminated_length": 551.723388671875, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 13.326530612244898, "grad_norm": 0.21070371568202972, "learning_rate": 1e-06, "loss": -0.0319, "num_tokens": 776868381.0, "reward": 0.652901828289032, "reward_std": 0.11655885726213455, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 1426 }, { "clip_ratio/high_max": 0.0019182517717126757, "clip_ratio/high_mean": 0.0006882018933538347, "clip_ratio/low_mean": 0.00025237160048163787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009405734635947738, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3832.0, "completions/mean_length": 992.2277221679688, "completions/mean_terminated_length": 580.2225341796875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 13.335860058309038, "grad_norm": 0.26085469126701355, "learning_rate": 1e-06, "loss": -0.0508, "num_tokens": 777414201.0, "reward": 0.6540178656578064, "reward_std": 0.12963788211345673, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1427 }, { "clip_ratio/high_max": 0.0021280049040797167, "clip_ratio/high_mean": 0.0006637554033659399, "clip_ratio/low_mean": 0.0003439082952354511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010076637081510853, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3688.0, "completions/mean_length": 857.2545166015625, "completions/mean_terminated_length": 569.9781494140625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 13.345189504373177, "grad_norm": 0.2858824133872986, "learning_rate": 1e-06, "loss": -0.0365, "num_tokens": 777971573.0, "reward": 0.6718750596046448, "reward_std": 0.14609484374523163, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1428 }, { "clip_ratio/high_max": 0.001930807720782468, "clip_ratio/high_mean": 0.0007317347431126109, "clip_ratio/low_mean": 0.0003103565891251492, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010420913240523078, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 875.6629638671875, "completions/mean_terminated_length": 529.3473510742188, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 13.354518950437317, "grad_norm": 0.4686256945133209, "learning_rate": 1e-06, "loss": -0.0564, "num_tokens": 778481751.0, "reward": 0.7020089626312256, "reward_std": 0.129900723695755, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1429 }, { "clip_ratio/high_max": 0.0016534370079170913, "clip_ratio/high_mean": 0.0006498158982140012, "clip_ratio/low_mean": 0.0005216361232669442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011714520296663977, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3980.0, "completions/mean_length": 910.4777221679688, "completions/mean_terminated_length": 528.2149658203125, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 13.363848396501458, "grad_norm": 0.27099400758743286, "learning_rate": 1e-06, "loss": -0.0518, "num_tokens": 778984387.0, "reward": 0.6662946939468384, "reward_std": 0.14522719383239746, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1430 }, { "clip_ratio/high_max": 0.0016680276712577324, "clip_ratio/high_mean": 0.0005007668742109672, "clip_ratio/low_mean": 0.0003237281475776399, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000824495011329418, "completions/clipped_ratio": 0.0970982142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2298.0, "completions/mean_length": 824.075927734375, "completions/mean_terminated_length": 472.2126159667969, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 13.373177842565598, "grad_norm": 0.22796332836151123, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 779461503.0, "reward": 0.6986607313156128, "reward_std": 0.11250059306621552, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1431 }, { "clip_ratio/high_max": 0.001909197340864921, "clip_ratio/high_mean": 0.0006651566982327495, "clip_ratio/low_mean": 0.0002979744176627719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009631311258999631, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2665.0, "completions/mean_length": 899.9699096679688, "completions/mean_terminated_length": 551.8873901367188, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 13.382507288629737, "grad_norm": 0.3207937479019165, "learning_rate": 1e-06, "loss": -0.0344, "num_tokens": 779999436.0, "reward": 0.6618303656578064, "reward_std": 0.13004270195960999, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1432 }, { "clip_ratio/high_max": 0.0013709939667023718, "clip_ratio/high_mean": 0.0004886877450189786, "clip_ratio/low_mean": 0.000465553274807462, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009542410625726916, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 1783.0, "completions/mean_length": 836.2388916015625, "completions/mean_terminated_length": 521.0355224609375, "completions/min_length": 158.0, "completions/min_terminated_length": 158.0, "epoch": 13.391836734693877, "grad_norm": 0.24078159034252167, "learning_rate": 1e-06, "loss": -0.0442, "num_tokens": 780519594.0, "reward": 0.6964285969734192, "reward_std": 0.13624556362628937, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600566029548645, "step": 1433 }, { "clip_ratio/high_max": 0.0018737911959760822, "clip_ratio/high_mean": 0.0007206155978565221, "clip_ratio/low_mean": 0.00024201836117754283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000962633987001027, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3279.0, "completions/mean_length": 764.630615234375, "completions/mean_terminated_length": 495.388427734375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.401166180758018, "grad_norm": 0.33389750123023987, "learning_rate": 1e-06, "loss": -0.0295, "num_tokens": 781022647.0, "reward": 0.7209821939468384, "reward_std": 0.12906630337238312, "rewards/verify_math_reward/mean": 0.7209821343421936, "rewards/verify_math_reward/std": 0.448766827583313, "step": 1434 }, { "clip_ratio/high_max": 0.0017861463347799145, "clip_ratio/high_mean": 0.0006591451729036635, "clip_ratio/low_mean": 0.00026640965461410815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009255548211513087, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 910.3895263671875, "completions/mean_terminated_length": 528.1162109375, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.410495626822158, "grad_norm": 0.2597056031227112, "learning_rate": 1e-06, "loss": -0.0691, "num_tokens": 781524348.0, "reward": 0.7366071939468384, "reward_std": 0.13685287535190582, "rewards/verify_math_reward/mean": 0.7366071343421936, "rewards/verify_math_reward/std": 0.44071969389915466, "step": 1435 }, { "clip_ratio/high_max": 0.0018368865803495282, "clip_ratio/high_mean": 0.0007173138292273507, "clip_ratio/low_mean": 0.00039299861941799463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001110312430682825, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3879.0, "completions/mean_length": 898.6707763671875, "completions/mean_terminated_length": 550.44677734375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.419825072886297, "grad_norm": 0.2400037795305252, "learning_rate": 1e-06, "loss": -0.0517, "num_tokens": 782067797.0, "reward": 0.6428571939468384, "reward_std": 0.14496827125549316, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 1436 }, { "clip_ratio/high_max": 0.0019859401436406188, "clip_ratio/high_mean": 0.0006794103574065957, "clip_ratio/low_mean": 0.00034261595601492445, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001022026333885151, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3113.0, "completions/mean_length": 965.23779296875, "completions/mean_terminated_length": 576.3475341796875, "completions/min_length": 159.0, "completions/min_terminated_length": 159.0, "epoch": 13.429154518950437, "grad_norm": 0.18977214395999908, "learning_rate": 1e-06, "loss": -0.0525, "num_tokens": 782613962.0, "reward": 0.6428571939468384, "reward_std": 0.15405938029289246, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.4794250428676605, "step": 1437 }, { "clip_ratio/high_max": 0.0012364787071419414, "clip_ratio/high_mean": 0.0004290785373086692, "clip_ratio/low_mean": 0.00027034244885726366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006994209816184593, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3631.0, "completions/mean_length": 916.7980346679688, "completions/mean_terminated_length": 530.8372802734375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 13.438483965014576, "grad_norm": 1.683428406715393, "learning_rate": 1e-06, "loss": -0.0624, "num_tokens": 783129669.0, "reward": 0.676339328289032, "reward_std": 0.10479705035686493, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335985660553, "step": 1438 }, { "clip_ratio/high_max": 0.0017593130869499873, "clip_ratio/high_mean": 0.0006590482280444121, "clip_ratio/low_mean": 0.00024789660892565735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009069448442460271, "completions/clipped_ratio": 0.1495535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3133.0, "completions/mean_length": 1085.7109375, "completions/mean_terminated_length": 556.342529296875, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.447813411078718, "grad_norm": 0.2715403139591217, "learning_rate": 1e-06, "loss": -0.0643, "num_tokens": 783649138.0, "reward": 0.5970982313156128, "reward_std": 0.13842658698558807, "rewards/verify_math_reward/mean": 0.5970982313156128, "rewards/verify_math_reward/std": 0.49075525999069214, "step": 1439 }, { "clip_ratio/high_max": 0.001605607907549711, "clip_ratio/high_mean": 0.0004410829133121297, "clip_ratio/low_mean": 0.00031743177214593743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007585147104691714, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2544.0, "completions/mean_length": 927.935302734375, "completions/mean_terminated_length": 552.197265625, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 13.457142857142857, "grad_norm": 0.19035007059574127, "learning_rate": 1e-06, "loss": -0.0495, "num_tokens": 784174800.0, "reward": 0.6662946939468384, "reward_std": 0.09517853707075119, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1440 }, { "clip_ratio/high_max": 0.0015615803604305256, "clip_ratio/high_mean": 0.0005633133860101225, "clip_ratio/low_mean": 0.0002843796905835916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008476930634060409, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 1000.1484985351562, "completions/mean_terminated_length": 606.8389892578125, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.466472303206997, "grad_norm": 0.2348775863647461, "learning_rate": 1e-06, "loss": -0.0628, "num_tokens": 784748941.0, "reward": 0.6350446939468384, "reward_std": 0.12291326373815536, "rewards/verify_math_reward/mean": 0.6350446343421936, "rewards/verify_math_reward/std": 0.481686532497406, "step": 1441 }, { "clip_ratio/high_max": 0.0015513555008510593, "clip_ratio/high_mean": 0.00046216016926337034, "clip_ratio/low_mean": 0.00034409974614391103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008062599245022284, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 928.7332763671875, "completions/mean_terminated_length": 570.6943969726562, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 13.475801749271136, "grad_norm": 0.21865294873714447, "learning_rate": 1e-06, "loss": -0.0197, "num_tokens": 785295086.0, "reward": 0.6707589626312256, "reward_std": 0.10220808535814285, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1442 }, { "clip_ratio/high_max": 0.003610143525293097, "clip_ratio/high_mean": 0.000933545805310132, "clip_ratio/low_mean": 0.0006267401413424523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015602859421051107, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3064.0, "completions/mean_length": 886.2332763671875, "completions/mean_terminated_length": 527.8225708007812, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 13.485131195335278, "grad_norm": 0.5562987923622131, "learning_rate": 1e-06, "loss": -0.0486, "num_tokens": 785820327.0, "reward": 0.6651785969734192, "reward_std": 0.14458990097045898, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219160199165344, "step": 1443 }, { "clip_ratio/high_max": 0.0020020690317323897, "clip_ratio/high_mean": 0.0007748012221782119, "clip_ratio/low_mean": 0.0002964324219192349, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010712336443248205, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1797.0, "completions/mean_length": 962.5156860351562, "completions/mean_terminated_length": 528.5260009765625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 13.494460641399417, "grad_norm": 0.3047040104866028, "learning_rate": 1e-06, "loss": -0.0885, "num_tokens": 786324365.0, "reward": 0.7142857313156128, "reward_std": 0.17130544781684875, "rewards/verify_math_reward/mean": 0.7142857313156128, "rewards/verify_math_reward/std": 0.4520062506198883, "step": 1444 }, { "clip_ratio/high_max": 0.0011822478027170291, "clip_ratio/high_mean": 0.00035488656817506126, "clip_ratio/low_mean": 0.0002599489271233324, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006148354896140518, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2685.0, "completions/mean_length": 945.638427734375, "completions/mean_terminated_length": 545.4037475585938, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 13.503790087463557, "grad_norm": 0.23118868470191956, "learning_rate": 1e-06, "loss": -0.0322, "num_tokens": 786848905.0, "reward": 0.645089328289032, "reward_std": 0.10517682880163193, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1445 }, { "clip_ratio/high_max": 0.0020006159065815154, "clip_ratio/high_mean": 0.0007312193247344112, "clip_ratio/low_mean": 0.0004801676841452718, "clip_ratio/low_min": 2.8682881747954525e-05, "clip_ratio/region_mean": 0.0012113870034227148, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3201.0, "completions/mean_length": 925.33935546875, "completions/mean_terminated_length": 558.1270141601562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.513119533527696, "grad_norm": 0.2648131251335144, "learning_rate": 1e-06, "loss": -0.0608, "num_tokens": 787380945.0, "reward": 0.6316964626312256, "reward_std": 0.13523778319358826, "rewards/verify_math_reward/mean": 0.6316964030265808, "rewards/verify_math_reward/std": 0.4826137125492096, "step": 1446 }, { "clip_ratio/high_max": 0.0020254668124835007, "clip_ratio/high_mean": 0.0006712548993164091, "clip_ratio/low_mean": 0.0005241719172772719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001195426801132271, "completions/clipped_ratio": 0.1294642857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2691.0, "completions/mean_length": 992.0904541015625, "completions/mean_terminated_length": 530.4833374023438, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.522448979591836, "grad_norm": 0.4312402307987213, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 787896666.0, "reward": 0.6160714626312256, "reward_std": 0.1471807062625885, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1447 }, { "clip_ratio/high_max": 0.0014023078638274455, "clip_ratio/high_mean": 0.0004600453930834192, "clip_ratio/low_mean": 0.0003318258569606769, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007918712526588934, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2888.0, "completions/mean_length": 903.5982666015625, "completions/mean_terminated_length": 516.0350341796875, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 13.531778425655977, "grad_norm": 0.22489960491657257, "learning_rate": 1e-06, "loss": -0.0313, "num_tokens": 788389978.0, "reward": 0.6774553656578064, "reward_std": 0.10633868724107742, "rewards/verify_math_reward/mean": 0.6774553656578064, "rewards/verify_math_reward/std": 0.4677111804485321, "step": 1448 }, { "clip_ratio/high_max": 0.0013727660625590943, "clip_ratio/high_mean": 0.0005316767715157766, "clip_ratio/low_mean": 0.00035899911131309636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008906758848752361, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3403.0, "completions/mean_length": 772.3114013671875, "completions/mean_terminated_length": 525.2266235351562, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 13.541107871720117, "grad_norm": 0.2135280966758728, "learning_rate": 1e-06, "loss": -0.0341, "num_tokens": 788917697.0, "reward": 0.7187500596046448, "reward_std": 0.1278284639120102, "rewards/verify_math_reward/mean": 0.71875, "rewards/verify_math_reward/std": 0.4498603343963623, "step": 1449 }, { "clip_ratio/high_max": 0.00202522271865746, "clip_ratio/high_mean": 0.0006648302296525799, "clip_ratio/low_mean": 0.00037332416786739486, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010381543943367433, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2742.0, "completions/mean_length": 945.4933471679688, "completions/mean_terminated_length": 504.58270263671875, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 13.550437317784256, "grad_norm": 0.45835256576538086, "learning_rate": 1e-06, "loss": -0.0482, "num_tokens": 789414787.0, "reward": 0.6383928656578064, "reward_std": 0.13846825063228607, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341694831848, "step": 1450 }, { "clip_ratio/high_max": 0.001945218289620243, "clip_ratio/high_mean": 0.0008428751607425511, "clip_ratio/low_mean": 0.00034109882290067617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011839740018331213, "completions/clipped_ratio": 0.0948660714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3608.0, "completions/mean_length": 900.0379638671875, "completions/mean_terminated_length": 565.07275390625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 13.559766763848396, "grad_norm": 0.25809982419013977, "learning_rate": 1e-06, "loss": -0.0665, "num_tokens": 789953325.0, "reward": 0.699776828289032, "reward_std": 0.1710016429424286, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1451 }, { "clip_ratio/high_max": 0.002498020290659042, "clip_ratio/high_mean": 0.0008431602927885251, "clip_ratio/low_mean": 0.0004155889710091287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012587492637976538, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3509.0, "completions/mean_length": 999.2645263671875, "completions/mean_terminated_length": 534.1578979492188, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.569096209912537, "grad_norm": 0.3384162187576294, "learning_rate": 1e-06, "loss": -0.0269, "num_tokens": 790452666.0, "reward": 0.65625, "reward_std": 0.13760244846343994, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1452 }, { "clip_ratio/high_max": 0.0018312933134438936, "clip_ratio/high_mean": 0.000621519347987487, "clip_ratio/low_mean": 0.00027285034411761444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008943696666392498, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2930.0, "completions/mean_length": 882.2277221679688, "completions/mean_terminated_length": 501.06866455078125, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 13.578425655976677, "grad_norm": 0.24517937004566193, "learning_rate": 1e-06, "loss": -0.0516, "num_tokens": 790940270.0, "reward": 0.6930803656578064, "reward_std": 0.12678678333759308, "rewards/verify_math_reward/mean": 0.6930803656578064, "rewards/verify_math_reward/std": 0.46147337555885315, "step": 1453 }, { "clip_ratio/high_max": 0.0017484512136434205, "clip_ratio/high_mean": 0.0005834791468259937, "clip_ratio/low_mean": 0.0004431280758581124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00102660721859138, "completions/clipped_ratio": 0.1127232142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 1966.0, "completions/mean_length": 914.5904541015625, "completions/mean_terminated_length": 510.41131591796875, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 13.587755102040816, "grad_norm": 0.2504347562789917, "learning_rate": 1e-06, "loss": -0.0558, "num_tokens": 791439559.0, "reward": 0.652901828289032, "reward_std": 0.12851063907146454, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1454 }, { "clip_ratio/high_max": 0.0018982372566824779, "clip_ratio/high_mean": 0.0007249323934956919, "clip_ratio/low_mean": 0.000293872105430637, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001018804519844707, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3704.0, "completions/mean_length": 965.7522583007812, "completions/mean_terminated_length": 581.3358154296875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 13.597084548104956, "grad_norm": 0.3169480860233307, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 791999737.0, "reward": 0.6428571939468384, "reward_std": 0.11948806047439575, "rewards/verify_math_reward/mean": 0.6428571343421936, "rewards/verify_math_reward/std": 0.47942501306533813, "step": 1455 }, { "clip_ratio/high_max": 0.002013426914345473, "clip_ratio/high_mean": 0.000531723276253615, "clip_ratio/low_mean": 0.00028123971742388676, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008129629859467968, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3913.0, "completions/mean_length": 1059.10498046875, "completions/mean_terminated_length": 589.48193359375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 13.606413994169095, "grad_norm": 0.1967323124408722, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 792553551.0, "reward": 0.6462053656578064, "reward_std": 0.1210271567106247, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1456 }, { "clip_ratio/high_max": 0.001914564665639773, "clip_ratio/high_mean": 0.000788834149716422, "clip_ratio/low_mean": 0.0003496232891393447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011384574318071827, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3653.0, "completions/mean_length": 993.4688110351562, "completions/mean_terminated_length": 545.7215576171875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 13.615743440233237, "grad_norm": 0.4443165957927704, "learning_rate": 1e-06, "loss": -0.0684, "num_tokens": 793067347.0, "reward": 0.6629464626312256, "reward_std": 0.14812570810317993, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1457 }, { "clip_ratio/high_max": 0.001701616663922323, "clip_ratio/high_mean": 0.0005171892580619897, "clip_ratio/low_mean": 0.00031081757128959, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008280068286694586, "completions/clipped_ratio": 0.0703125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2139.0, "completions/mean_length": 739.0424194335938, "completions/mean_terminated_length": 485.15484619140625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 13.625072886297376, "grad_norm": 0.421511709690094, "learning_rate": 1e-06, "loss": -0.0526, "num_tokens": 793560577.0, "reward": 0.6897321939468384, "reward_std": 0.09897467494010925, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1458 }, { "clip_ratio/high_max": 0.0023946032124513295, "clip_ratio/high_mean": 0.0009514205348750693, "clip_ratio/low_mean": 0.00043876720155822113, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001390187750075711, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2651.0, "completions/mean_length": 1038.01904296875, "completions/mean_terminated_length": 556.0116577148438, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 13.634402332361516, "grad_norm": 0.29964977502822876, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 794086762.0, "reward": 0.6004464626312256, "reward_std": 0.16183707118034363, "rewards/verify_math_reward/mean": 0.6004464030265808, "rewards/verify_math_reward/std": 0.49008017778396606, "step": 1459 }, { "clip_ratio/high_max": 0.0018743944528978318, "clip_ratio/high_mean": 0.0007847478591429535, "clip_ratio/low_mean": 0.0004296823426557239, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001214430209074635, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2594.0, "completions/mean_length": 983.6105346679688, "completions/mean_terminated_length": 592.6067504882812, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 13.643731778425655, "grad_norm": 0.25965142250061035, "learning_rate": 1e-06, "loss": -0.052, "num_tokens": 794653301.0, "reward": 0.6674107313156128, "reward_std": 0.15736766159534454, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1460 }, { "clip_ratio/high_max": 0.0016970364158623852, "clip_ratio/high_mean": 0.0006360034976751194, "clip_ratio/low_mean": 0.0003294871667094412, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009654906461946666, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 1059.421875, "completions/mean_terminated_length": 589.847900390625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 13.653061224489797, "grad_norm": 0.2414640635251999, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 795191535.0, "reward": 0.6729910969734192, "reward_std": 0.13827574253082275, "rewards/verify_math_reward/mean": 0.6729910969734192, "rewards/verify_math_reward/std": 0.46938255429267883, "step": 1461 }, { "clip_ratio/high_max": 0.002116244228091091, "clip_ratio/high_mean": 0.0006606608640140621, "clip_ratio/low_mean": 0.00031311290831581573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000973773785517551, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 919.14404296875, "completions/mean_terminated_length": 524.5281982421875, "completions/min_length": 161.0, "completions/min_terminated_length": 161.0, "epoch": 13.662390670553936, "grad_norm": 0.26867973804473877, "learning_rate": 1e-06, "loss": -0.0586, "num_tokens": 795700912.0, "reward": 0.6863839626312256, "reward_std": 0.12050652503967285, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 1462 }, { "clip_ratio/high_max": 0.00176824001755449, "clip_ratio/high_mean": 0.0006499588116639643, "clip_ratio/low_mean": 0.0004712876520898135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011212464669370092, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 1105.4765625, "completions/mean_terminated_length": 574.9631958007812, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 13.671720116618076, "grad_norm": 0.23908397555351257, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 796225899.0, "reward": 0.6205357313156128, "reward_std": 0.146812304854393, "rewards/verify_math_reward/mean": 0.6205357313156128, "rewards/verify_math_reward/std": 0.4855247139930725, "step": 1463 }, { "clip_ratio/high_max": 0.0017086016478060628, "clip_ratio/high_mean": 0.0006271425013437693, "clip_ratio/low_mean": 0.0004496029623624054, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010767454623419326, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3657.0, "completions/mean_length": 1058.10498046875, "completions/mean_terminated_length": 542.5352783203125, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 13.681049562682215, "grad_norm": 0.25820058584213257, "learning_rate": 1e-06, "loss": -0.0462, "num_tokens": 796726169.0, "reward": 0.5691964626312256, "reward_std": 0.13493607938289642, "rewards/verify_math_reward/mean": 0.5691964030265808, "rewards/verify_math_reward/std": 0.4954652488231659, "step": 1464 }, { "clip_ratio/high_max": 0.0024719912617001683, "clip_ratio/high_mean": 0.0008474741189274937, "clip_ratio/low_mean": 0.0005479869960254291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013954611058579758, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3713.0, "completions/mean_length": 865.1517944335938, "completions/mean_terminated_length": 539.6854858398438, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 13.690379008746355, "grad_norm": 0.30524924397468567, "learning_rate": 1e-06, "loss": -0.0546, "num_tokens": 797261913.0, "reward": 0.6752232313156128, "reward_std": 0.17325612902641296, "rewards/verify_math_reward/mean": 0.6752232313156128, "rewards/verify_math_reward/std": 0.46855294704437256, "step": 1465 }, { "clip_ratio/high_max": 0.0018566695907793473, "clip_ratio/high_mean": 0.0006630695525018382, "clip_ratio/low_mean": 0.0004151960770286678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010782656318042427, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3872.0, "completions/mean_length": 880.1272583007812, "completions/mean_terminated_length": 529.8836669921875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 13.699708454810496, "grad_norm": 0.3109317719936371, "learning_rate": 1e-06, "loss": -0.0249, "num_tokens": 797768427.0, "reward": 0.6852678656578064, "reward_std": 0.14602065086364746, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1466 }, { "clip_ratio/high_max": 0.0019457281050563324, "clip_ratio/high_mean": 0.000726408077753149, "clip_ratio/low_mean": 0.00034779537486429035, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010742034355644137, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3743.0, "completions/mean_length": 953.927490234375, "completions/mean_terminated_length": 572.474365234375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 13.709037900874636, "grad_norm": 0.29716986417770386, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 798309818.0, "reward": 0.6662946939468384, "reward_std": 0.14101991057395935, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1467 }, { "clip_ratio/high_max": 0.0018728079685388366, "clip_ratio/high_mean": 0.0005517528716154629, "clip_ratio/low_mean": 0.00037528320081037236, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009270360696973512, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2096.0, "completions/mean_length": 919.5792846679688, "completions/mean_terminated_length": 507.0050354003906, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 13.718367346938775, "grad_norm": 0.9936861991882324, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 798813393.0, "reward": 0.6383928656578064, "reward_std": 0.10990727692842484, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1468 }, { "clip_ratio/high_max": 0.0024873462971299887, "clip_ratio/high_mean": 0.0008032383084355388, "clip_ratio/low_mean": 0.0005039778361606295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013072161236777902, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3358.0, "completions/mean_length": 1041.958740234375, "completions/mean_terminated_length": 614.54833984375, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 13.727696793002915, "grad_norm": 0.41095873713493347, "learning_rate": 1e-06, "loss": -0.039, "num_tokens": 799387036.0, "reward": 0.6339285969734192, "reward_std": 0.1579303741455078, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1469 }, { "clip_ratio/high_max": 0.0015492766797251534, "clip_ratio/high_mean": 0.0006564006334883743, "clip_ratio/low_mean": 0.0005405517995313858, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011969524166488554, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3884.0, "completions/mean_length": 922.7578735351562, "completions/mean_terminated_length": 550.8316650390625, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 13.737026239067056, "grad_norm": 0.30893468856811523, "learning_rate": 1e-06, "loss": -0.0447, "num_tokens": 799908515.0, "reward": 0.6964285969734192, "reward_std": 0.14489158987998962, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565433502197, "step": 1470 }, { "clip_ratio/high_max": 0.0026250950031680986, "clip_ratio/high_mean": 0.000864764811922214, "clip_ratio/low_mean": 0.0003525011343299411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001217265937157208, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3280.0, "completions/mean_length": 1033.6473388671875, "completions/mean_terminated_length": 527.9011840820312, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.746355685131196, "grad_norm": 5.366886138916016, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 800402807.0, "reward": 0.6808035969734192, "reward_std": 0.14913460612297058, "rewards/verify_math_reward/mean": 0.6808035969734192, "rewards/verify_math_reward/std": 0.4664256274700165, "step": 1471 }, { "clip_ratio/high_max": 0.0016081378271337599, "clip_ratio/high_mean": 0.0005036907032263116, "clip_ratio/low_mean": 0.0002505595759885182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007542502844444243, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3582.0, "completions/mean_length": 969.93310546875, "completions/mean_terminated_length": 545.9923706054688, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.755685131195335, "grad_norm": 0.2958771586418152, "learning_rate": 1e-06, "loss": -0.025, "num_tokens": 800932339.0, "reward": 0.6551339626312256, "reward_std": 0.10257647186517715, "rewards/verify_math_reward/mean": 0.6551339030265808, "rewards/verify_math_reward/std": 0.4755900502204895, "step": 1472 }, { "clip_ratio/high_max": 0.0016386216011596844, "clip_ratio/high_mean": 0.0006217188465598156, "clip_ratio/low_mean": 0.0003549203192960704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009766391649463912, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 1005.3795166015625, "completions/mean_terminated_length": 581.7918701171875, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 13.765014577259475, "grad_norm": 0.2535308003425598, "learning_rate": 1e-06, "loss": -0.0433, "num_tokens": 801474783.0, "reward": 0.640625, "reward_std": 0.13275183737277985, "rewards/verify_math_reward/mean": 0.640625, "rewards/verify_math_reward/std": 0.48008525371551514, "step": 1473 }, { "clip_ratio/high_max": 0.0018186428060289472, "clip_ratio/high_mean": 0.0005990914196445374, "clip_ratio/low_mean": 0.00041964480942624505, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010187362277065404, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3347.0, "completions/mean_length": 995.4129638671875, "completions/mean_terminated_length": 561.488525390625, "completions/min_length": 183.0, "completions/min_terminated_length": 183.0, "epoch": 13.774344023323614, "grad_norm": 0.39022114872932434, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 802014017.0, "reward": 0.6116071939468384, "reward_std": 0.15770283341407776, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.4876568913459778, "step": 1474 }, { "clip_ratio/high_max": 0.0014813031448284164, "clip_ratio/high_mean": 0.0005341916303223115, "clip_ratio/low_mean": 0.00038841452828819456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009226061210938497, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2462.0, "completions/mean_length": 1058.1373291015625, "completions/mean_terminated_length": 583.8387451171875, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 13.783673469387756, "grad_norm": 0.2332642674446106, "learning_rate": 1e-06, "loss": -0.0539, "num_tokens": 802560236.0, "reward": 0.6082589626312256, "reward_std": 0.12828311324119568, "rewards/verify_math_reward/mean": 0.6082589030265808, "rewards/verify_math_reward/std": 0.48841196298599243, "step": 1475 }, { "clip_ratio/high_max": 0.0019524216404533945, "clip_ratio/high_mean": 0.0007695368740314734, "clip_ratio/low_mean": 0.00037678508579119807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011463219507277245, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2047.0, "completions/mean_length": 801.8939819335938, "completions/mean_terminated_length": 500.9707946777344, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 13.793002915451895, "grad_norm": 0.5175551772117615, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 803068061.0, "reward": 0.7098214626312256, "reward_std": 0.14635835587978363, "rewards/verify_math_reward/mean": 0.7098214030265808, "rewards/verify_math_reward/std": 0.454098105430603, "step": 1476 }, { "clip_ratio/high_max": 0.001574541403897456, "clip_ratio/high_mean": 0.0005633783980556473, "clip_ratio/low_mean": 0.0003890172697538219, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009523956305201864, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3808.0, "completions/mean_length": 1013.9006958007812, "completions/mean_terminated_length": 582.5635986328125, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 13.802332361516035, "grad_norm": 0.24361242353916168, "learning_rate": 1e-06, "loss": -0.0465, "num_tokens": 803621268.0, "reward": 0.613839328289032, "reward_std": 0.13673411309719086, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1477 }, { "clip_ratio/high_max": 0.0019728389888769016, "clip_ratio/high_mean": 0.0006432071022572927, "clip_ratio/low_mean": 0.000295518333587097, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009387254485773155, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3161.0, "completions/mean_length": 962.2879638671875, "completions/mean_terminated_length": 550.7904052734375, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 13.811661807580174, "grad_norm": 0.257691890001297, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 804135934.0, "reward": 0.6785714626312256, "reward_std": 0.12106994539499283, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1478 }, { "clip_ratio/high_max": 0.0021615647201542743, "clip_ratio/high_mean": 0.0008199662770493887, "clip_ratio/low_mean": 0.0003965479390899418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012165141852165107, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 1740.0, "completions/mean_length": 1011.2366333007812, "completions/mean_terminated_length": 579.5267333984375, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 13.820991253644316, "grad_norm": 5.130914688110352, "learning_rate": 1e-06, "loss": -0.044, "num_tokens": 804678978.0, "reward": 0.6261160969734192, "reward_std": 0.15631458163261414, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1479 }, { "clip_ratio/high_max": 0.00164507688532467, "clip_ratio/high_mean": 0.0005966942408122122, "clip_ratio/low_mean": 0.00026963624486597837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008663305070513161, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2364.0, "completions/mean_length": 956.6652221679688, "completions/mean_terminated_length": 512.7592163085938, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 13.830320699708455, "grad_norm": 0.19591157138347626, "learning_rate": 1e-06, "loss": -0.0542, "num_tokens": 805165902.0, "reward": 0.6975446939468384, "reward_std": 0.132416233420372, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1480 }, { "clip_ratio/high_max": 0.0014007445561219356, "clip_ratio/high_mean": 0.000502886847471018, "clip_ratio/low_mean": 0.0003058118484204897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008086986745183822, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3935.0, "completions/mean_length": 904.5100708007812, "completions/mean_terminated_length": 548.1401977539062, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 13.839650145772595, "grad_norm": 0.3516015112400055, "learning_rate": 1e-06, "loss": -0.0217, "num_tokens": 805690495.0, "reward": 0.7042410969734192, "reward_std": 0.0993858352303505, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1481 }, { "clip_ratio/high_max": 0.002013493052800186, "clip_ratio/high_mean": 0.0006001068650220986, "clip_ratio/low_mean": 0.0003182639659371489, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009183708280033898, "completions/clipped_ratio": 0.1964285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 1311.4888916015625, "completions/mean_terminated_length": 630.83056640625, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 13.848979591836734, "grad_norm": 0.35657399892807007, "learning_rate": 1e-06, "loss": -0.0466, "num_tokens": 806244477.0, "reward": 0.543526828289032, "reward_std": 0.11347699910402298, "rewards/verify_math_reward/mean": 0.5435267686843872, "rewards/verify_math_reward/std": 0.49838000535964966, "step": 1482 }, { "clip_ratio/high_max": 0.0021818411951244343, "clip_ratio/high_mean": 0.0006567358777829213, "clip_ratio/low_mean": 0.0006275751775319804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012843110416724812, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3680.0, "completions/mean_length": 1001.90185546875, "completions/mean_terminated_length": 555.3716430664062, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 13.858309037900874, "grad_norm": 0.5369406342506409, "learning_rate": 1e-06, "loss": -0.0458, "num_tokens": 806772429.0, "reward": 0.6261160969734192, "reward_std": 0.15285293757915497, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1483 }, { "clip_ratio/high_max": 0.0016363545873900875, "clip_ratio/high_mean": 0.0005250055328360759, "clip_ratio/low_mean": 0.000411384296057804, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009363898097944912, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3075.0, "completions/mean_length": 908.2433471679688, "completions/mean_terminated_length": 525.7124633789062, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 13.867638483965015, "grad_norm": 1.6832938194274902, "learning_rate": 1e-06, "loss": -0.0408, "num_tokens": 807282567.0, "reward": 0.6640625, "reward_std": 0.12215510755777359, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1484 }, { "clip_ratio/high_max": 0.002052319818176329, "clip_ratio/high_mean": 0.0008283975366794039, "clip_ratio/low_mean": 0.0003622072408688837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011906047984666657, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3848.0, "completions/mean_length": 976.26123046875, "completions/mean_terminated_length": 539.656494140625, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 13.876967930029155, "grad_norm": 0.24110916256904602, "learning_rate": 1e-06, "loss": -0.0974, "num_tokens": 807788137.0, "reward": 0.6662946939468384, "reward_std": 0.15680311620235443, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1485 }, { "clip_ratio/high_max": 0.0017715245303406846, "clip_ratio/high_mean": 0.0005689511453965679, "clip_ratio/low_mean": 0.0002710913105374857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008400424503633985, "completions/clipped_ratio": 0.1551339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2613.0, "completions/mean_length": 1114.954345703125, "completions/mean_terminated_length": 567.5759887695312, "completions/min_length": 123.0, "completions/min_terminated_length": 123.0, "epoch": 13.886297376093294, "grad_norm": 0.23268258571624756, "learning_rate": 1e-06, "loss": -0.0636, "num_tokens": 808304704.0, "reward": 0.6015625, "reward_std": 0.11674319207668304, "rewards/verify_math_reward/mean": 0.6015625, "rewards/verify_math_reward/std": 0.48984986543655396, "step": 1486 }, { "clip_ratio/high_max": 0.0015273608551069628, "clip_ratio/high_mean": 0.0005877606245121569, "clip_ratio/low_mean": 0.0004931669473080547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010809276063810103, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3066.0, "completions/mean_length": 937.0725708007812, "completions/mean_terminated_length": 522.263916015625, "completions/min_length": 146.0, "completions/min_terminated_length": 146.0, "epoch": 13.895626822157434, "grad_norm": 0.2541758716106415, "learning_rate": 1e-06, "loss": -0.0398, "num_tokens": 808807705.0, "reward": 0.6718750596046448, "reward_std": 0.12569162249565125, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1487 }, { "clip_ratio/high_max": 0.0018329851263843011, "clip_ratio/high_mean": 0.00056038483944576, "clip_ratio/low_mean": 0.00028137042909293086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008417552562605124, "completions/clipped_ratio": 0.0881696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 847.9230346679688, "completions/mean_terminated_length": 533.8494873046875, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 13.904956268221575, "grad_norm": 0.20590364933013916, "learning_rate": 1e-06, "loss": -0.037, "num_tokens": 809321596.0, "reward": 0.6886160969734192, "reward_std": 0.11039513349533081, "rewards/verify_math_reward/mean": 0.6886160969734192, "rewards/verify_math_reward/std": 0.46331802010536194, "step": 1488 }, { "clip_ratio/high_max": 0.00229115974434535, "clip_ratio/high_mean": 0.000901408802747028, "clip_ratio/low_mean": 0.00042819570444407873, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001329604521743022, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2226.0, "completions/mean_length": 902.654052734375, "completions/mean_terminated_length": 519.4525146484375, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 13.914285714285715, "grad_norm": 0.36295849084854126, "learning_rate": 1e-06, "loss": -0.0635, "num_tokens": 809832070.0, "reward": 0.6741071939468384, "reward_std": 0.15139050781726837, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1489 }, { "clip_ratio/high_max": 0.002087779728753958, "clip_ratio/high_mean": 0.0007416142852889607, "clip_ratio/low_mean": 0.0004565545327750442, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011981688330706675, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2305.0, "completions/mean_length": 932.2199096679688, "completions/mean_terminated_length": 503.1647644042969, "completions/min_length": 152.0, "completions/min_terminated_length": 152.0, "epoch": 13.923615160349854, "grad_norm": 0.3551120460033417, "learning_rate": 1e-06, "loss": -0.0909, "num_tokens": 810317467.0, "reward": 0.660714328289032, "reward_std": 0.1403031200170517, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1490 }, { "clip_ratio/high_max": 0.0013714795095438603, "clip_ratio/high_mean": 0.00045341820987232495, "clip_ratio/low_mean": 0.0003700997604028089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008235179739131127, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4037.0, "completions/mean_length": 973.0803833007812, "completions/mean_terminated_length": 580.7537841796875, "completions/min_length": 200.0, "completions/min_terminated_length": 200.0, "epoch": 13.932944606413994, "grad_norm": 0.29760563373565674, "learning_rate": 1e-06, "loss": -0.0529, "num_tokens": 810866995.0, "reward": 0.613839328289032, "reward_std": 0.13057151436805725, "rewards/verify_math_reward/mean": 0.6138392686843872, "rewards/verify_math_reward/std": 0.48714008927345276, "step": 1491 }, { "clip_ratio/high_max": 0.001929035919602029, "clip_ratio/high_mean": 0.0006863545404485194, "clip_ratio/low_mean": 0.00037580204616460833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010621565670589916, "completions/clipped_ratio": 0.1450892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3876.0, "completions/mean_length": 1046.118408203125, "completions/mean_terminated_length": 528.5143432617188, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.942274052478133, "grad_norm": 0.24002155661582947, "learning_rate": 1e-06, "loss": -0.054, "num_tokens": 811361005.0, "reward": 0.6071428656578064, "reward_std": 0.12767621874809265, "rewards/verify_math_reward/mean": 0.6071428656578064, "rewards/verify_math_reward/std": 0.48865827918052673, "step": 1492 }, { "clip_ratio/high_max": 0.0014654858168796636, "clip_ratio/high_mean": 0.0005337569509720197, "clip_ratio/low_mean": 0.00028353541347314604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000817292378997081, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 818.2433471679688, "completions/mean_terminated_length": 505.69439697265625, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 13.951603498542275, "grad_norm": 0.23312684893608093, "learning_rate": 1e-06, "loss": -0.0397, "num_tokens": 811861967.0, "reward": 0.6941964626312256, "reward_std": 0.11945484578609467, "rewards/verify_math_reward/mean": 0.6941964030265808, "rewards/verify_math_reward/std": 0.4610042870044708, "step": 1493 }, { "clip_ratio/high_max": 0.002324598935956601, "clip_ratio/high_mean": 0.0007541353406850249, "clip_ratio/low_mean": 0.00045425827102008043, "clip_ratio/low_min": 1.3031693015363999e-05, "clip_ratio/region_mean": 0.0012083935907867271, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3613.0, "completions/mean_length": 1113.310302734375, "completions/mean_terminated_length": 616.1953125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 13.960932944606414, "grad_norm": 0.3092258870601654, "learning_rate": 1e-06, "loss": -0.0832, "num_tokens": 812431469.0, "reward": 0.6261160969734192, "reward_std": 0.14755865931510925, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1494 }, { "clip_ratio/high_max": 0.0015647552099835593, "clip_ratio/high_mean": 0.0005115541280247271, "clip_ratio/low_mean": 0.00038354821617758716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008951023701229133, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 1059.239990234375, "completions/mean_terminated_length": 562.3155517578125, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 13.970262390670554, "grad_norm": 0.27657631039619446, "learning_rate": 1e-06, "loss": -0.0487, "num_tokens": 812945540.0, "reward": 0.5926339626312256, "reward_std": 0.11997589468955994, "rewards/verify_math_reward/mean": 0.5926339030265808, "rewards/verify_math_reward/std": 0.49161845445632935, "step": 1495 }, { "clip_ratio/high_max": 0.0020840627112193033, "clip_ratio/high_mean": 0.0008205575250030961, "clip_ratio/low_mean": 0.00040433476669932134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012248922867001966, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2438.0, "completions/mean_length": 896.6016235351562, "completions/mean_terminated_length": 503.6929931640625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 13.979591836734693, "grad_norm": 0.2688261568546295, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 813432687.0, "reward": 0.6640625, "reward_std": 0.1401950567960739, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1496 }, { "clip_ratio/high_max": 0.0015964990416250657, "clip_ratio/high_mean": 0.000543468352589116, "clip_ratio/low_mean": 0.0004935717452099198, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001037040121445898, "completions/clipped_ratio": 0.1462053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3648.0, "completions/mean_length": 1108.9989013671875, "completions/mean_terminated_length": 597.4993896484375, "completions/min_length": 171.0, "completions/min_terminated_length": 171.0, "epoch": 13.988921282798835, "grad_norm": 0.21441441774368286, "learning_rate": 1e-06, "loss": -0.0758, "num_tokens": 813982822.0, "reward": 0.5915178656578064, "reward_std": 0.13985693454742432, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1497 }, { "clip_ratio/high_max": 0.0017150953171949368, "clip_ratio/high_mean": 0.0006848354114481481, "clip_ratio/low_mean": 0.0002689252323762048, "clip_ratio/low_min": 1.4240145901567303e-05, "clip_ratio/region_mean": 0.0009537606310914271, "completions/clipped_ratio": 0.15909090909090906, "completions/max_length": 4096.0, "completions/max_terminated_length": 2030.0, "completions/mean_length": 1102.0909423828125, "completions/mean_terminated_length": 535.6756591796875, "completions/min_length": 190.0, "completions/min_terminated_length": 190.0, "epoch": 13.998250728862974, "grad_norm": 0.239691361784935, "learning_rate": 1e-06, "loss": -0.0797, "num_tokens": 814499339.0, "reward": 0.6662946939468384, "reward_std": 0.13357990980148315, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179925441741943, "step": 1498 }, { "clip_ratio/high_max": 0.001494333037044271, "clip_ratio/high_mean": 0.000578145759391191, "clip_ratio/low_mean": 0.0002765736257970275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008547193610866088, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2152.0, "completions/mean_length": 1018.07373046875, "completions/mean_terminated_length": 542.1056518554688, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.00932944606414, "grad_norm": 1.4641883373260498, "learning_rate": 1e-06, "loss": -0.0358, "num_tokens": 815007925.0, "reward": 0.6573660969734192, "reward_std": 0.11637480556964874, "rewards/verify_math_reward/mean": 0.6573660969734192, "rewards/verify_math_reward/std": 0.47485533356666565, "step": 1499 }, { "clip_ratio/high_max": 0.0020549969995045103, "clip_ratio/high_mean": 0.0006765435027773492, "clip_ratio/low_mean": 0.0003070677812502254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000983611276751617, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2758.0, "completions/mean_length": 870.6529541015625, "completions/mean_terminated_length": 541.3739013671875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.018658892128279, "grad_norm": 0.24957065284252167, "learning_rate": 1e-06, "loss": -0.0646, "num_tokens": 815543750.0, "reward": 0.7410714626312256, "reward_std": 0.14895054697990417, "rewards/verify_math_reward/mean": 0.7410714030265808, "rewards/verify_math_reward/std": 0.43829095363616943, "step": 1500 }, { "clip_ratio/high_max": 0.001968701009900542, "clip_ratio/high_mean": 0.000634106368124776, "clip_ratio/low_mean": 0.0005157212635822361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011498276089696446, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3604.0, "completions/mean_length": 1011.075927734375, "completions/mean_terminated_length": 556.8297119140625, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 14.02798833819242, "grad_norm": 0.44304928183555603, "learning_rate": 1e-06, "loss": -0.0619, "num_tokens": 816077162.0, "reward": 0.6037946939468384, "reward_std": 0.129141166806221, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1501 }, { "clip_ratio/high_max": 0.0018333724947297014, "clip_ratio/high_mean": 0.0007765739246679004, "clip_ratio/low_mean": 0.0004834348496842722, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012600087884493405, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2947.0, "completions/mean_length": 930.888427734375, "completions/mean_terminated_length": 515.2677001953125, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.03731778425656, "grad_norm": 0.36276882886886597, "learning_rate": 1e-06, "loss": -0.0471, "num_tokens": 816566382.0, "reward": 0.6674107313156128, "reward_std": 0.1360626220703125, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1502 }, { "clip_ratio/high_max": 0.002207079916843213, "clip_ratio/high_mean": 0.000813964232293074, "clip_ratio/low_mean": 0.0004805520211448311, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001294516277994262, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2487.0, "completions/mean_length": 910.5357666015625, "completions/mean_terminated_length": 554.8386840820312, "completions/min_length": 182.0, "completions/min_terminated_length": 182.0, "epoch": 14.0466472303207, "grad_norm": 0.26648738980293274, "learning_rate": 1e-06, "loss": -0.0634, "num_tokens": 817103662.0, "reward": 0.6741071939468384, "reward_std": 0.16484086215496063, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1503 }, { "clip_ratio/high_max": 0.001593323813722236, "clip_ratio/high_mean": 0.000571819620745373, "clip_ratio/low_mean": 0.0004090447291673627, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000980864360826672, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3443.0, "completions/mean_length": 897.1406860351562, "completions/mean_terminated_length": 526.6624755859375, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 14.055976676384839, "grad_norm": 0.28196415305137634, "learning_rate": 1e-06, "loss": -0.0329, "num_tokens": 817611732.0, "reward": 0.65625, "reward_std": 0.13932742178440094, "rewards/verify_math_reward/mean": 0.65625, "rewards/verify_math_reward/std": 0.4752241373062134, "step": 1504 }, { "clip_ratio/high_max": 0.0013122446544002742, "clip_ratio/high_mean": 0.0003747532045963453, "clip_ratio/low_mean": 0.0003058970505662728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006806502588005969, "completions/clipped_ratio": 0.1037946428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3077.0, "completions/mean_length": 932.4967041015625, "completions/mean_terminated_length": 566.11328125, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 14.06530612244898, "grad_norm": 0.20008648931980133, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 818153761.0, "reward": 0.6517857313156128, "reward_std": 0.11794712394475937, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1505 }, { "clip_ratio/high_max": 0.0018449998533469625, "clip_ratio/high_mean": 0.0005978632216283586, "clip_ratio/low_mean": 0.0003785571439038904, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009764203714439645, "completions/clipped_ratio": 0.1383928571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 1013.8638916015625, "completions/mean_terminated_length": 518.8056640625, "completions/min_length": 175.0, "completions/min_terminated_length": 175.0, "epoch": 14.07463556851312, "grad_norm": 0.24399596452713013, "learning_rate": 1e-06, "loss": -0.0437, "num_tokens": 818649047.0, "reward": 0.6540178656578064, "reward_std": 0.11899950355291367, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1506 }, { "clip_ratio/high_max": 0.0029248369501146954, "clip_ratio/high_mean": 0.0008199370113288751, "clip_ratio/low_mean": 0.00039066135104803834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001210598351462977, "completions/clipped_ratio": 0.1372767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3702.0, "completions/mean_length": 1035.27685546875, "completions/mean_terminated_length": 548.2535400390625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.08396501457726, "grad_norm": 0.4364930987358093, "learning_rate": 1e-06, "loss": -0.0452, "num_tokens": 819161023.0, "reward": 0.6517857313156128, "reward_std": 0.14365628361701965, "rewards/verify_math_reward/mean": 0.6517857313156128, "rewards/verify_math_reward/std": 0.47667041420936584, "step": 1507 }, { "clip_ratio/high_max": 0.0018456281832186505, "clip_ratio/high_mean": 0.0006877476062072674, "clip_ratio/low_mean": 0.0004106035758013604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010983512038365006, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2231.0, "completions/mean_length": 963.0558471679688, "completions/mean_terminated_length": 515.4923095703125, "completions/min_length": 170.0, "completions/min_terminated_length": 170.0, "epoch": 14.093294460641399, "grad_norm": 0.3371732831001282, "learning_rate": 1e-06, "loss": -0.0783, "num_tokens": 819653073.0, "reward": 0.6819196939468384, "reward_std": 0.15458819270133972, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1508 }, { "clip_ratio/high_max": 0.0016413013872806914, "clip_ratio/high_mean": 0.0005821581821692234, "clip_ratio/low_mean": 0.00026635393624019343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000848512137963553, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3787.0, "completions/mean_length": 992.4241333007812, "completions/mean_terminated_length": 535.4315185546875, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 14.102623906705539, "grad_norm": 0.2613990008831024, "learning_rate": 1e-06, "loss": -0.0325, "num_tokens": 820162677.0, "reward": 0.6986607313156128, "reward_std": 0.11419376730918884, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1509 }, { "clip_ratio/high_max": 0.00206036433155532, "clip_ratio/high_mean": 0.0007325881288124947, "clip_ratio/low_mean": 0.00027539572738533025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001007983857562067, "completions/clipped_ratio": 0.0926339285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3854.0, "completions/mean_length": 857.6506958007812, "completions/mean_terminated_length": 527.0442504882812, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 14.11195335276968, "grad_norm": 0.20512865483760834, "learning_rate": 1e-06, "loss": -0.0719, "num_tokens": 820673396.0, "reward": 0.731026828289032, "reward_std": 0.13568215072155, "rewards/verify_math_reward/mean": 0.7310267686843872, "rewards/verify_math_reward/std": 0.44367367029190063, "step": 1510 }, { "clip_ratio/high_max": 0.0014188510867825244, "clip_ratio/high_mean": 0.0004800527813131339, "clip_ratio/low_mean": 0.00029871640936107724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007787692102283472, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3358.0, "completions/mean_length": 1049.77685546875, "completions/mean_terminated_length": 578.7113037109375, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 14.12128279883382, "grad_norm": 0.24495892226696014, "learning_rate": 1e-06, "loss": -0.0478, "num_tokens": 821206028.0, "reward": 0.6361607313156128, "reward_std": 0.1256481409072876, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1511 }, { "clip_ratio/high_max": 0.0010677577156457119, "clip_ratio/high_mean": 0.00040600342754260055, "clip_ratio/low_mean": 0.00016483723129567807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0005708406652047415, "completions/clipped_ratio": 0.0691964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3004.0, "completions/mean_length": 754.8917846679688, "completions/mean_terminated_length": 506.5119934082031, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 14.130612244897959, "grad_norm": 0.18473725020885468, "learning_rate": 1e-06, "loss": -0.0379, "num_tokens": 821708779.0, "reward": 0.7533482313156128, "reward_std": 0.09130503982305527, "rewards/verify_math_reward/mean": 0.7533482313156128, "rewards/verify_math_reward/std": 0.4313030242919922, "step": 1512 }, { "clip_ratio/high_max": 0.0018346720971749164, "clip_ratio/high_mean": 0.0006204481560416752, "clip_ratio/low_mean": 0.0003428258760322933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009632740475353785, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3574.0, "completions/mean_length": 925.114990234375, "completions/mean_terminated_length": 553.465087890625, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 14.139941690962099, "grad_norm": 1.1437463760375977, "learning_rate": 1e-06, "loss": -0.0509, "num_tokens": 822246506.0, "reward": 0.6194196939468384, "reward_std": 0.13083434104919434, "rewards/verify_math_reward/mean": 0.6194196343421936, "rewards/verify_math_reward/std": 0.48580074310302734, "step": 1513 }, { "clip_ratio/high_max": 0.0012576879689731868, "clip_ratio/high_mean": 0.000406599920552253, "clip_ratio/low_mean": 0.00036734900686496985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007739489374216646, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3198.0, "completions/mean_length": 1022.2254638671875, "completions/mean_terminated_length": 560.5673828125, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.14927113702624, "grad_norm": 0.848977267742157, "learning_rate": 1e-06, "loss": -0.0438, "num_tokens": 822777356.0, "reward": 0.6462053656578064, "reward_std": 0.10246770083904266, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1514 }, { "clip_ratio/high_max": 0.0016075218627520371, "clip_ratio/high_mean": 0.0005811413732317305, "clip_ratio/low_mean": 0.00028787522683160205, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008690165977895958, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 1043.935302734375, "completions/mean_terminated_length": 581.0256958007812, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 14.15860058309038, "grad_norm": 0.2685943841934204, "learning_rate": 1e-06, "loss": -0.0506, "num_tokens": 823319834.0, "reward": 0.6506696939468384, "reward_std": 0.10882211476564407, "rewards/verify_math_reward/mean": 0.6506696343421936, "rewards/verify_math_reward/std": 0.47702476382255554, "step": 1515 }, { "clip_ratio/high_max": 0.0016328601450368296, "clip_ratio/high_mean": 0.0006034293946868274, "clip_ratio/low_mean": 0.000265215342551528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008686447181389667, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3138.0, "completions/mean_length": 951.2277221679688, "completions/mean_terminated_length": 515.6746826171875, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 14.167930029154519, "grad_norm": 0.2632819414138794, "learning_rate": 1e-06, "loss": -0.0863, "num_tokens": 823814934.0, "reward": 0.7042410969734192, "reward_std": 0.12944427132606506, "rewards/verify_math_reward/mean": 0.7042410969734192, "rewards/verify_math_reward/std": 0.45663803815841675, "step": 1516 }, { "clip_ratio/high_max": 0.002156028051103931, "clip_ratio/high_mean": 0.000844777683596476, "clip_ratio/low_mean": 0.0005361340854506125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013809117663186044, "completions/clipped_ratio": 0.1138392857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3776.0, "completions/mean_length": 911.6819458007812, "completions/mean_terminated_length": 502.61334228515625, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 14.177259475218658, "grad_norm": 0.31470048427581787, "learning_rate": 1e-06, "loss": -0.0537, "num_tokens": 824302201.0, "reward": 0.6863839626312256, "reward_std": 0.17589321732521057, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 1517 }, { "clip_ratio/high_max": 0.0016641835100017488, "clip_ratio/high_mean": 0.0005968749319436029, "clip_ratio/low_mean": 0.00023741816278288752, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008342931150764343, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2608.0, "completions/mean_length": 977.8035888671875, "completions/mean_terminated_length": 572.7919311523438, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 14.186588921282798, "grad_norm": 0.2105596363544464, "learning_rate": 1e-06, "loss": -0.0698, "num_tokens": 824841865.0, "reward": 0.637276828289032, "reward_std": 0.12313193827867508, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1518 }, { "clip_ratio/high_max": 0.0015158499372773804, "clip_ratio/high_mean": 0.0005213018552012727, "clip_ratio/low_mean": 0.0002866446910729792, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008079465351329418, "completions/clipped_ratio": 0.1283482142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3851.0, "completions/mean_length": 979.4576416015625, "completions/mean_terminated_length": 520.5557250976562, "completions/min_length": 134.0, "completions/min_terminated_length": 134.0, "epoch": 14.19591836734694, "grad_norm": 0.2224515825510025, "learning_rate": 1e-06, "loss": -0.032, "num_tokens": 825341411.0, "reward": 0.6852678656578064, "reward_std": 0.10618714243173599, "rewards/verify_math_reward/mean": 0.6852678656578064, "rewards/verify_math_reward/std": 0.46466848254203796, "step": 1519 }, { "clip_ratio/high_max": 0.002123734593624249, "clip_ratio/high_mean": 0.0008263798099505948, "clip_ratio/low_mean": 0.0006075028886698419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001433882691344479, "completions/clipped_ratio": 0.0814732142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3952.0, "completions/mean_length": 803.4475708007812, "completions/mean_terminated_length": 511.3985595703125, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.205247813411079, "grad_norm": 0.5233650207519531, "learning_rate": 1e-06, "loss": -0.0407, "num_tokens": 825851604.0, "reward": 0.7087053656578064, "reward_std": 0.15526330471038818, "rewards/verify_math_reward/mean": 0.7087053656578064, "rewards/verify_math_reward/std": 0.45461276173591614, "step": 1520 }, { "clip_ratio/high_max": 0.001779746904503554, "clip_ratio/high_mean": 0.0006358881364576519, "clip_ratio/low_mean": 0.0006412241928046569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012771123547281604, "completions/clipped_ratio": 0.1205357142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3425.0, "completions/mean_length": 954.9464721679688, "completions/mean_terminated_length": 524.4467163085938, "completions/min_length": 160.0, "completions/min_terminated_length": 160.0, "epoch": 14.214577259475218, "grad_norm": 0.5685335993766785, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 826350204.0, "reward": 0.6785714626312256, "reward_std": 0.13023632764816284, "rewards/verify_math_reward/mean": 0.6785714030265808, "rewards/verify_math_reward/std": 0.46728572249412537, "step": 1521 }, { "clip_ratio/high_max": 0.0014518308926199097, "clip_ratio/high_mean": 0.000579665770601423, "clip_ratio/low_mean": 0.0005093620625302719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001089027819034527, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3067.0, "completions/mean_length": 1039.0614013671875, "completions/mean_terminated_length": 561.7845458984375, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 14.223906705539358, "grad_norm": 0.2760242521762848, "learning_rate": 1e-06, "loss": -0.0753, "num_tokens": 826867123.0, "reward": 0.6540178656578064, "reward_std": 0.15052077174186707, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1522 }, { "clip_ratio/high_max": 0.0016179232188733295, "clip_ratio/high_mean": 0.0005964905258224462, "clip_ratio/low_mean": 0.00032461810633321875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009211086289724335, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2828.0, "completions/mean_length": 1017.26904296875, "completions/mean_terminated_length": 581.9324951171875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.2332361516035, "grad_norm": 0.22143647074699402, "learning_rate": 1e-06, "loss": -0.0716, "num_tokens": 827412236.0, "reward": 0.625, "reward_std": 0.1583433598279953, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1523 }, { "clip_ratio/high_max": 0.001661519267145195, "clip_ratio/high_mean": 0.0005118761273479322, "clip_ratio/low_mean": 0.0004245043710398022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009363805038447026, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3926.0, "completions/mean_length": 846.8303833007812, "completions/mean_terminated_length": 550.01220703125, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.242565597667639, "grad_norm": 0.21743392944335938, "learning_rate": 1e-06, "loss": -0.0106, "num_tokens": 827952252.0, "reward": 0.6640625, "reward_std": 0.1083681657910347, "rewards/verify_math_reward/mean": 0.6640625, "rewards/verify_math_reward/std": 0.4725809693336487, "step": 1524 }, { "clip_ratio/high_max": 0.0018410912598483264, "clip_ratio/high_mean": 0.0005218886806233058, "clip_ratio/low_mean": 0.000281309810361563, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008031984616536647, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 951.0078735351562, "completions/mean_terminated_length": 524.5006103515625, "completions/min_length": 136.0, "completions/min_terminated_length": 136.0, "epoch": 14.251895043731778, "grad_norm": 0.8745452761650085, "learning_rate": 1e-06, "loss": -0.0363, "num_tokens": 828466963.0, "reward": 0.652901828289032, "reward_std": 0.10952933132648468, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631317377090454, "step": 1525 }, { "clip_ratio/high_max": 0.001884348279418191, "clip_ratio/high_mean": 0.0006042873546903138, "clip_ratio/low_mean": 0.0005702761377506249, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001174563483800739, "completions/clipped_ratio": 0.1227678571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3868.0, "completions/mean_length": 968.3058471679688, "completions/mean_terminated_length": 530.5877685546875, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 14.261224489795918, "grad_norm": 0.30108314752578735, "learning_rate": 1e-06, "loss": -0.0776, "num_tokens": 828974245.0, "reward": 0.645089328289032, "reward_std": 0.11885752528905869, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1526 }, { "clip_ratio/high_max": 0.002004976682655979, "clip_ratio/high_mean": 0.0006427386269933777, "clip_ratio/low_mean": 0.0003822580783889862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010249966981064063, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 2648.0, "completions/mean_length": 954.4944458007812, "completions/mean_terminated_length": 537.4804077148438, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 14.270553935860057, "grad_norm": 0.3322337567806244, "learning_rate": 1e-06, "loss": -0.0681, "num_tokens": 829497208.0, "reward": 0.6707589626312256, "reward_std": 0.12467173486948013, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1527 }, { "clip_ratio/high_max": 0.0026705336204031482, "clip_ratio/high_mean": 0.0007938176222523907, "clip_ratio/low_mean": 0.0004538355865406629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012476531974243699, "completions/clipped_ratio": 0.1584821428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2441.0, "completions/mean_length": 1123.77685546875, "completions/mean_terminated_length": 564.021240234375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 14.279883381924199, "grad_norm": 0.6157507300376892, "learning_rate": 1e-06, "loss": -0.0707, "num_tokens": 830015192.0, "reward": 0.5770089626312256, "reward_std": 0.14815708994865417, "rewards/verify_math_reward/mean": 0.5770089030265808, "rewards/verify_math_reward/std": 0.4943099319934845, "step": 1528 }, { "clip_ratio/high_max": 0.0017188321180583443, "clip_ratio/high_mean": 0.0007196113292593509, "clip_ratio/low_mean": 0.0004686434240284143, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011882547878485639, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2788.0, "completions/mean_length": 1075.4051513671875, "completions/mean_terminated_length": 576.5552368164062, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 14.289212827988338, "grad_norm": 0.2834455966949463, "learning_rate": 1e-06, "loss": -0.0597, "num_tokens": 830539283.0, "reward": 0.5993303656578064, "reward_std": 0.15988357365131378, "rewards/verify_math_reward/mean": 0.5993303656578064, "rewards/verify_math_reward/std": 0.49030786752700806, "step": 1529 }, { "clip_ratio/high_max": 0.0013908015898778103, "clip_ratio/high_mean": 0.00041138293636322487, "clip_ratio/low_mean": 0.00022284483452494896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006342277683870634, "completions/clipped_ratio": 0.0837053571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 796.5491333007812, "completions/mean_terminated_length": 495.1376647949219, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 14.298542274052478, "grad_norm": 0.23000116646289825, "learning_rate": 1e-06, "loss": -0.0612, "num_tokens": 831018359.0, "reward": 0.7611607313156128, "reward_std": 0.08781202137470245, "rewards/verify_math_reward/mean": 0.7611607313156128, "rewards/verify_math_reward/std": 0.4266124963760376, "step": 1530 }, { "clip_ratio/high_max": 0.0016838195297168568, "clip_ratio/high_mean": 0.0005107820786633965, "clip_ratio/low_mean": 0.0003008752792084124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008116573462757515, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3761.0, "completions/mean_length": 952.9676513671875, "completions/mean_terminated_length": 562.5533447265625, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 14.307871720116617, "grad_norm": 0.2157803773880005, "learning_rate": 1e-06, "loss": -0.0428, "num_tokens": 831554218.0, "reward": 0.6975446939468384, "reward_std": 0.12978056073188782, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1531 }, { "clip_ratio/high_max": 0.002110756959154969, "clip_ratio/high_mean": 0.0007731584628345445, "clip_ratio/low_mean": 0.00021015188758610748, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009833103358687367, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2834.0, "completions/mean_length": 948.55810546875, "completions/mean_terminated_length": 562.0300903320312, "completions/min_length": 149.0, "completions/min_terminated_length": 149.0, "epoch": 14.317201166180759, "grad_norm": 0.21734938025474548, "learning_rate": 1e-06, "loss": -0.0595, "num_tokens": 832096062.0, "reward": 0.6618303656578064, "reward_std": 0.12816546857357025, "rewards/verify_math_reward/mean": 0.6618303656578064, "rewards/verify_math_reward/std": 0.4733508229255676, "step": 1532 }, { "clip_ratio/high_max": 0.0017332326679024845, "clip_ratio/high_mean": 0.0006725101638949127, "clip_ratio/low_mean": 0.0002929731977019401, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009654833429522114, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2972.0, "completions/mean_length": 923.9710083007812, "completions/mean_terminated_length": 525.474853515625, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 14.326530612244898, "grad_norm": 0.2405531257390976, "learning_rate": 1e-06, "loss": -0.0781, "num_tokens": 832594404.0, "reward": 0.625, "reward_std": 0.14897871017456055, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1533 }, { "clip_ratio/high_max": 0.00227256025391398, "clip_ratio/high_mean": 0.0008926759655878413, "clip_ratio/low_mean": 0.0004002686164312763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012929445874760859, "completions/clipped_ratio": 0.1305803571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3894.0, "completions/mean_length": 1032.454345703125, "completions/mean_terminated_length": 572.33251953125, "completions/min_length": 166.0, "completions/min_terminated_length": 166.0, "epoch": 14.335860058309038, "grad_norm": 1.404003381729126, "learning_rate": 1e-06, "loss": -0.0594, "num_tokens": 833128595.0, "reward": 0.652901828289032, "reward_std": 0.17442122101783752, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1534 }, { "clip_ratio/high_max": 0.0018078499851981178, "clip_ratio/high_mean": 0.0006283736802288331, "clip_ratio/low_mean": 0.0003628180102168699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009911917004501447, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2368.0, "completions/mean_length": 932.7120971679688, "completions/mean_terminated_length": 553.1174926757812, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 14.345189504373177, "grad_norm": 0.2802864909172058, "learning_rate": 1e-06, "loss": -0.0855, "num_tokens": 833662209.0, "reward": 0.6495535969734192, "reward_std": 0.16059784591197968, "rewards/verify_math_reward/mean": 0.6495535969734192, "rewards/verify_math_reward/std": 0.477376252412796, "step": 1535 }, { "clip_ratio/high_max": 0.0019803850263997447, "clip_ratio/high_mean": 0.0006916289194123237, "clip_ratio/low_mean": 0.00033670690902454226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010283358460583258, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2288.0, "completions/mean_length": 849.2433471679688, "completions/mean_terminated_length": 539.650390625, "completions/min_length": 141.0, "completions/min_terminated_length": 141.0, "epoch": 14.354518950437317, "grad_norm": 0.2729128897190094, "learning_rate": 1e-06, "loss": -0.0445, "num_tokens": 834186579.0, "reward": 0.6830357313156128, "reward_std": 0.13174405694007874, "rewards/verify_math_reward/mean": 0.6830357313156128, "rewards/verify_math_reward/std": 0.46555325388908386, "step": 1536 }, { "clip_ratio/high_max": 0.001609412760444684, "clip_ratio/high_mean": 0.0005967626675555948, "clip_ratio/low_mean": 0.0005037324272052501, "clip_ratio/low_min": 1.6344141840818338e-05, "clip_ratio/region_mean": 0.001100495079299435, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3476.0, "completions/mean_length": 940.35498046875, "completions/mean_terminated_length": 570.4912719726562, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.363848396501458, "grad_norm": 0.3091679811477661, "learning_rate": 1e-06, "loss": -0.0222, "num_tokens": 834743345.0, "reward": 0.5870535969734192, "reward_std": 0.13891583681106567, "rewards/verify_math_reward/mean": 0.5870535969734192, "rewards/verify_math_reward/std": 0.49263834953308105, "step": 1537 }, { "clip_ratio/high_max": 0.0018131489341612905, "clip_ratio/high_mean": 0.0007738125659670914, "clip_ratio/low_mean": 0.0003779981457228132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011518107094161678, "completions/clipped_ratio": 0.1261160714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3420.0, "completions/mean_length": 987.7020263671875, "completions/mean_terminated_length": 539.1226196289062, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 14.373177842565598, "grad_norm": 0.28053566813468933, "learning_rate": 1e-06, "loss": -0.0514, "num_tokens": 835245926.0, "reward": 0.6305803656578064, "reward_std": 0.1388830542564392, "rewards/verify_math_reward/mean": 0.6305803656578064, "rewards/verify_math_reward/std": 0.4829172194004059, "step": 1538 }, { "clip_ratio/high_max": 0.001976877418201184, "clip_ratio/high_mean": 0.0006994004324951675, "clip_ratio/low_mean": 0.00029225382195363636, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009916542330756783, "completions/clipped_ratio": 0.1071428571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2877.0, "completions/mean_length": 930.2422485351562, "completions/mean_terminated_length": 550.3512573242188, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 14.382507288629737, "grad_norm": 0.38584429025650024, "learning_rate": 1e-06, "loss": -0.0378, "num_tokens": 835770783.0, "reward": 0.660714328289032, "reward_std": 0.12816476821899414, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1539 }, { "clip_ratio/high_max": 0.0015576084151689429, "clip_ratio/high_mean": 0.000549391243566788, "clip_ratio/low_mean": 0.0003806230924965348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009300143301516073, "completions/clipped_ratio": 0.0747767857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3619.0, "completions/mean_length": 777.021240234375, "completions/mean_terminated_length": 508.78045654296875, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 14.391836734693877, "grad_norm": 0.31385764479637146, "learning_rate": 1e-06, "loss": -0.0272, "num_tokens": 836275162.0, "reward": 0.7198660969734192, "reward_std": 0.12253489345312119, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931527972221375, "step": 1540 }, { "clip_ratio/high_max": 0.0019661447731778026, "clip_ratio/high_mean": 0.0006889410569783649, "clip_ratio/low_mean": 0.000354509037151729, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010434501182317035, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2722.0, "completions/mean_length": 970.3248291015625, "completions/mean_terminated_length": 599.6141967773438, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.401166180758018, "grad_norm": 0.2678011953830719, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 836850405.0, "reward": 0.6473214626312256, "reward_std": 0.13831712305545807, "rewards/verify_math_reward/mean": 0.6473214030265808, "rewards/verify_math_reward/std": 0.47807058691978455, "step": 1541 }, { "clip_ratio/high_max": 0.001748717335431138, "clip_ratio/high_mean": 0.0006184371759445639, "clip_ratio/low_mean": 0.0003229705503144942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009414077449036995, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2646.0, "completions/mean_length": 934.1004638671875, "completions/mean_terminated_length": 550.2402954101562, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 14.410495626822158, "grad_norm": 0.2840591371059418, "learning_rate": 1e-06, "loss": -0.0255, "num_tokens": 837370727.0, "reward": 0.6964285969734192, "reward_std": 0.11851094663143158, "rewards/verify_math_reward/mean": 0.6964285969734192, "rewards/verify_math_reward/std": 0.4600565731525421, "step": 1542 }, { "clip_ratio/high_max": 0.0015848389230086468, "clip_ratio/high_mean": 0.0005479890296555823, "clip_ratio/low_mean": 0.0002825919219731077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008305809478770243, "completions/clipped_ratio": 0.0915178571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3835.0, "completions/mean_length": 899.1016235351562, "completions/mean_terminated_length": 577.0552978515625, "completions/min_length": 156.0, "completions/min_terminated_length": 156.0, "epoch": 14.419825072886297, "grad_norm": 0.26387879252433777, "learning_rate": 1e-06, "loss": -0.0298, "num_tokens": 837922114.0, "reward": 0.6975446939468384, "reward_std": 0.10990910232067108, "rewards/verify_math_reward/mean": 0.6975446343421936, "rewards/verify_math_reward/std": 0.45957788825035095, "step": 1543 }, { "clip_ratio/high_max": 0.001600129977305187, "clip_ratio/high_mean": 0.00046474543796648504, "clip_ratio/low_mean": 0.0003207011232007062, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007854465457057813, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3104.0, "completions/mean_length": 967.3359985351562, "completions/mean_terminated_length": 524.9388427734375, "completions/min_length": 130.0, "completions/min_terminated_length": 130.0, "epoch": 14.429154518950437, "grad_norm": 0.3005709946155548, "learning_rate": 1e-06, "loss": -0.0492, "num_tokens": 838436319.0, "reward": 0.6629464626312256, "reward_std": 0.12456366419792175, "rewards/verify_math_reward/mean": 0.6629464030265808, "rewards/verify_math_reward/std": 0.47296738624572754, "step": 1544 }, { "clip_ratio/high_max": 0.0013656958890351234, "clip_ratio/high_mean": 0.0003816203889073222, "clip_ratio/low_mean": 0.00028702770123345545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006686480865027988, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3195.0, "completions/mean_length": 1053.65625, "completions/mean_terminated_length": 574.1137084960938, "completions/min_length": 148.0, "completions/min_terminated_length": 148.0, "epoch": 14.438483965014576, "grad_norm": 0.28173989057540894, "learning_rate": 1e-06, "loss": -0.0498, "num_tokens": 838969747.0, "reward": 0.652901828289032, "reward_std": 0.11419195681810379, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1545 }, { "clip_ratio/high_max": 0.0016461078048450872, "clip_ratio/high_mean": 0.0006027553740750591, "clip_ratio/low_mean": 0.0003256778613831557, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009284332354582148, "completions/clipped_ratio": 0.1428571428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 2469.0, "completions/mean_length": 1075.6640625, "completions/mean_terminated_length": 572.2747802734375, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 14.447813411078718, "grad_norm": 0.2972122132778168, "learning_rate": 1e-06, "loss": -0.0756, "num_tokens": 839492134.0, "reward": 0.6160714626312256, "reward_std": 0.13106118142604828, "rewards/verify_math_reward/mean": 0.6160714030265808, "rewards/verify_math_reward/std": 0.486612468957901, "step": 1546 }, { "clip_ratio/high_max": 0.002694419275940163, "clip_ratio/high_mean": 0.0006896465120007633, "clip_ratio/low_mean": 0.0005058231317889295, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001195469649246661, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3206.0, "completions/mean_length": 997.536865234375, "completions/mean_terminated_length": 527.5899658203125, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.457142857142857, "grad_norm": 0.37613818049430847, "learning_rate": 1e-06, "loss": -0.0639, "num_tokens": 839996671.0, "reward": 0.6261160969734192, "reward_std": 0.13403385877609253, "rewards/verify_math_reward/mean": 0.6261160969734192, "rewards/verify_math_reward/std": 0.48410359025001526, "step": 1547 }, { "clip_ratio/high_max": 0.0013512234763766173, "clip_ratio/high_mean": 0.00044560117385117337, "clip_ratio/low_mean": 0.00027892060859358025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007245217875606613, "completions/clipped_ratio": 0.1484375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3773.0, "completions/mean_length": 1115.359375, "completions/mean_terminated_length": 595.7981567382812, "completions/min_length": 167.0, "completions/min_terminated_length": 167.0, "epoch": 14.466472303206997, "grad_norm": 0.20257292687892914, "learning_rate": 1e-06, "loss": -0.0375, "num_tokens": 840553721.0, "reward": 0.5915178656578064, "reward_std": 0.10051559656858444, "rewards/verify_math_reward/mean": 0.5915178656578064, "rewards/verify_math_reward/std": 0.49182769656181335, "step": 1548 }, { "clip_ratio/high_max": 0.0017015369103319244, "clip_ratio/high_mean": 0.0006325729314085038, "clip_ratio/low_mean": 0.0003980436658821418, "clip_ratio/low_min": 1.1647409337456338e-05, "clip_ratio/region_mean": 0.001030616597745393, "completions/clipped_ratio": 0.1149553571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 966.771240234375, "completions/mean_terminated_length": 560.3265991210938, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 14.475801749271136, "grad_norm": 0.30500295758247375, "learning_rate": 1e-06, "loss": -0.0822, "num_tokens": 841079340.0, "reward": 0.676339328289032, "reward_std": 0.12448951601982117, "rewards/verify_math_reward/mean": 0.6763392686843872, "rewards/verify_math_reward/std": 0.4681335687637329, "step": 1549 }, { "clip_ratio/high_max": 0.0016326721852237824, "clip_ratio/high_mean": 0.0006306517716438975, "clip_ratio/low_mean": 0.00029594478337457986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009265965600206982, "completions/clipped_ratio": 0.1015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3088.0, "completions/mean_length": 899.1473388671875, "completions/mean_terminated_length": 537.7639770507812, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 14.485131195335278, "grad_norm": 0.2316146343946457, "learning_rate": 1e-06, "loss": -0.0439, "num_tokens": 841611320.0, "reward": 0.6584821939468384, "reward_std": 0.13136427104473114, "rewards/verify_math_reward/mean": 0.6584821343421936, "rewards/verify_math_reward/std": 0.4744836091995239, "step": 1550 }, { "clip_ratio/high_max": 0.0016231729277933482, "clip_ratio/high_mean": 0.0005429292850749334, "clip_ratio/low_mean": 0.0005203179716772866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010632472622091882, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3682.0, "completions/mean_length": 896.49560546875, "completions/mean_terminated_length": 539.2307739257812, "completions/min_length": 176.0, "completions/min_terminated_length": 176.0, "epoch": 14.494460641399417, "grad_norm": 0.3035215735435486, "learning_rate": 1e-06, "loss": -0.0307, "num_tokens": 842131084.0, "reward": 0.6819196939468384, "reward_std": 0.1396312266588211, "rewards/verify_math_reward/mean": 0.6819196343421936, "rewards/verify_math_reward/std": 0.46599099040031433, "step": 1551 }, { "clip_ratio/high_max": 0.0016773203897173516, "clip_ratio/high_mean": 0.0005758293227700051, "clip_ratio/low_mean": 0.0006926721243871725, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012685014662565663, "completions/clipped_ratio": 0.1339285714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3566.0, "completions/mean_length": 1011.7388916015625, "completions/mean_terminated_length": 534.7911987304688, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 14.503790087463557, "grad_norm": 0.700936496257782, "learning_rate": 1e-06, "loss": -0.0243, "num_tokens": 842640098.0, "reward": 0.6383928656578064, "reward_std": 0.11389067769050598, "rewards/verify_math_reward/mean": 0.6383928656578064, "rewards/verify_math_reward/std": 0.4807341992855072, "step": 1552 }, { "clip_ratio/high_max": 0.0020729823008878157, "clip_ratio/high_mean": 0.0008595442141086096, "clip_ratio/low_mean": 0.0005430965966297663, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014026407916389871, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3245.0, "completions/mean_length": 936.2991333007812, "completions/mean_terminated_length": 552.7058715820312, "completions/min_length": 142.0, "completions/min_terminated_length": 142.0, "epoch": 14.513119533527696, "grad_norm": 0.33733874559402466, "learning_rate": 1e-06, "loss": -0.0455, "num_tokens": 843170374.0, "reward": 0.6718750596046448, "reward_std": 0.16773615777492523, "rewards/verify_math_reward/mean": 0.671875, "rewards/verify_math_reward/std": 0.46979284286499023, "step": 1553 }, { "clip_ratio/high_max": 0.00203073719603708, "clip_ratio/high_mean": 0.0006710498910251772, "clip_ratio/low_mean": 0.000389734203054104, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010607840995362494, "completions/clipped_ratio": 0.1395089285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3944.0, "completions/mean_length": 1030.282470703125, "completions/mean_terminated_length": 533.2464599609375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 14.522448979591836, "grad_norm": 0.2851661145687103, "learning_rate": 1e-06, "loss": -0.0968, "num_tokens": 843671971.0, "reward": 0.629464328289032, "reward_std": 0.15277667343616486, "rewards/verify_math_reward/mean": 0.6294642686843872, "rewards/verify_math_reward/std": 0.4832179844379425, "step": 1554 }, { "clip_ratio/high_max": 0.001452928758226335, "clip_ratio/high_mean": 0.00048307620136256446, "clip_ratio/low_mean": 0.00024061604199232534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007236922465381213, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3337.0, "completions/mean_length": 1010.6484985351562, "completions/mean_terminated_length": 583.3252563476562, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 14.531778425655977, "grad_norm": 0.21543121337890625, "learning_rate": 1e-06, "loss": -0.0196, "num_tokens": 844229576.0, "reward": 0.645089328289032, "reward_std": 0.10051559656858444, "rewards/verify_math_reward/mean": 0.6450892686843872, "rewards/verify_math_reward/std": 0.4787535071372986, "step": 1555 }, { "clip_ratio/high_max": 0.002144656846212456, "clip_ratio/high_mean": 0.0007379114322247915, "clip_ratio/low_mean": 0.0003275800419260122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010654914731276222, "completions/clipped_ratio": 0.140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4053.0, "completions/mean_length": 1059.665283203125, "completions/mean_terminated_length": 562.8103637695312, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 14.541107871720117, "grad_norm": 0.27838853001594543, "learning_rate": 1e-06, "loss": -0.0575, "num_tokens": 844758284.0, "reward": 0.6238839626312256, "reward_std": 0.13260169327259064, "rewards/verify_math_reward/mean": 0.6238839030265808, "rewards/verify_math_reward/std": 0.4846802353858948, "step": 1556 }, { "clip_ratio/high_max": 0.002209893886174541, "clip_ratio/high_mean": 0.0008417112112510949, "clip_ratio/low_mean": 0.00031508226584264776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011567934780032374, "completions/clipped_ratio": 0.0859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 844.6830444335938, "completions/mean_terminated_length": 539.003662109375, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 14.550437317784256, "grad_norm": 0.49434801936149597, "learning_rate": 1e-06, "loss": -0.0602, "num_tokens": 845286176.0, "reward": 0.7165178656578064, "reward_std": 0.16394072771072388, "rewards/verify_math_reward/mean": 0.7165178656578064, "rewards/verify_math_reward/std": 0.4509401023387909, "step": 1557 }, { "clip_ratio/high_max": 0.0014534040456055664, "clip_ratio/high_mean": 0.0005090693261990964, "clip_ratio/low_mean": 0.0002322719885796687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007413413059111917, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2905.0, "completions/mean_length": 832.8873291015625, "completions/mean_terminated_length": 495.3238830566406, "completions/min_length": 140.0, "completions/min_terminated_length": 140.0, "epoch": 14.559766763848396, "grad_norm": 0.28550097346305847, "learning_rate": 1e-06, "loss": -0.0169, "num_tokens": 845767787.0, "reward": 0.7332589626312256, "reward_std": 0.09345327317714691, "rewards/verify_math_reward/mean": 0.7332589030265808, "rewards/verify_math_reward/std": 0.4425029158592224, "step": 1558 }, { "clip_ratio/high_max": 0.0013655441525770584, "clip_ratio/high_mean": 0.0004264852286723908, "clip_ratio/low_mean": 0.00022893789764566463, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006554231513291597, "completions/clipped_ratio": 0.1026785714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2688.0, "completions/mean_length": 881.8594360351562, "completions/mean_terminated_length": 514.0721435546875, "completions/min_length": 157.0, "completions/min_terminated_length": 157.0, "epoch": 14.569096209912537, "grad_norm": 0.29426658153533936, "learning_rate": 1e-06, "loss": -0.0422, "num_tokens": 846271461.0, "reward": 0.6741071939468384, "reward_std": 0.09517784416675568, "rewards/verify_math_reward/mean": 0.6741071343421936, "rewards/verify_math_reward/std": 0.4689692556858063, "step": 1559 }, { "clip_ratio/high_max": 0.0016492469512741081, "clip_ratio/high_mean": 0.000665633537209942, "clip_ratio/low_mean": 0.0003009655340520112, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009665990692155901, "completions/clipped_ratio": 0.1607142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3719.0, "completions/mean_length": 1140.915283203125, "completions/mean_terminated_length": 575.0478515625, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 14.578425655976677, "grad_norm": 0.34149521589279175, "learning_rate": 1e-06, "loss": -0.0457, "num_tokens": 846792065.0, "reward": 0.609375, "reward_std": 0.13809071481227875, "rewards/verify_math_reward/mean": 0.609375, "rewards/verify_math_reward/std": 0.48816296458244324, "step": 1560 }, { "clip_ratio/high_max": 0.0017487114273535553, "clip_ratio/high_mean": 0.0007000863224675413, "clip_ratio/low_mean": 0.00026969055079462123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009697768582555, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3973.0, "completions/mean_length": 926.3739013671875, "completions/mean_terminated_length": 510.16033935546875, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 14.587755102040816, "grad_norm": 0.27039656043052673, "learning_rate": 1e-06, "loss": -0.0628, "num_tokens": 847283432.0, "reward": 0.6897321939468384, "reward_std": 0.12497665733098984, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.4628615975379944, "step": 1561 }, { "clip_ratio/high_max": 0.0025974907548516057, "clip_ratio/high_mean": 0.0008866977495927131, "clip_ratio/low_mean": 0.0006239195990929147, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015106173705135006, "completions/clipped_ratio": 0.0870535714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2733.0, "completions/mean_length": 821.2689819335938, "completions/mean_terminated_length": 509.0085754394531, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 14.597084548104956, "grad_norm": 1.9152460098266602, "learning_rate": 1e-06, "loss": -0.0306, "num_tokens": 847793777.0, "reward": 0.6897321939468384, "reward_std": 0.1448495090007782, "rewards/verify_math_reward/mean": 0.6897321343421936, "rewards/verify_math_reward/std": 0.462861567735672, "step": 1562 }, { "clip_ratio/high_max": 0.001565180609759409, "clip_ratio/high_mean": 0.0005695019563063397, "clip_ratio/low_mean": 0.0002960622764476284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008655642213852843, "completions/clipped_ratio": 0.0982142857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3497.0, "completions/mean_length": 882.911865234375, "completions/mean_terminated_length": 532.9714965820312, "completions/min_length": 144.0, "completions/min_terminated_length": 144.0, "epoch": 14.606413994169095, "grad_norm": 0.19960281252861023, "learning_rate": 1e-06, "loss": -0.0578, "num_tokens": 848309738.0, "reward": 0.6986607313156128, "reward_std": 0.12181811034679413, "rewards/verify_math_reward/mean": 0.6986607313156128, "rewards/verify_math_reward/std": 0.4590960443019867, "step": 1563 }, { "clip_ratio/high_max": 0.002330915940547129, "clip_ratio/high_mean": 0.0007907459512352943, "clip_ratio/low_mean": 0.00045763651905872393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012483825157687534, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2865.0, "completions/mean_length": 1017.6417846679688, "completions/mean_terminated_length": 577.8762817382812, "completions/min_length": 162.0, "completions/min_terminated_length": 162.0, "epoch": 14.615743440233237, "grad_norm": 0.40501147508621216, "learning_rate": 1e-06, "loss": -0.0476, "num_tokens": 848852249.0, "reward": 0.652901828289032, "reward_std": 0.13057473301887512, "rewards/verify_math_reward/mean": 0.6529017686843872, "rewards/verify_math_reward/std": 0.47631320357322693, "step": 1564 }, { "clip_ratio/high_max": 0.001787970308214426, "clip_ratio/high_mean": 0.0006398654822987737, "clip_ratio/low_mean": 0.0003360553191669169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009759207823663019, "completions/clipped_ratio": 0.1160714285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3624.0, "completions/mean_length": 922.4810791015625, "completions/mean_terminated_length": 505.7563171386719, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.625072886297376, "grad_norm": 0.26958921551704407, "learning_rate": 1e-06, "loss": -0.0441, "num_tokens": 849341368.0, "reward": 0.684151828289032, "reward_std": 0.11881474405527115, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1565 }, { "clip_ratio/high_max": 0.0016191406866710167, "clip_ratio/high_mean": 0.0005101120368635748, "clip_ratio/low_mean": 0.00048234225255328056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000992454315564828, "completions/clipped_ratio": 0.1082589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2959.0, "completions/mean_length": 900.8560791015625, "completions/mean_terminated_length": 512.9599609375, "completions/min_length": 143.0, "completions/min_terminated_length": 143.0, "epoch": 14.634402332361516, "grad_norm": 0.2411060333251953, "learning_rate": 1e-06, "loss": -0.035, "num_tokens": 849836071.0, "reward": 0.6707589626312256, "reward_std": 0.11411890387535095, "rewards/verify_math_reward/mean": 0.6707589030265808, "rewards/verify_math_reward/std": 0.4702001214027405, "step": 1566 }, { "clip_ratio/high_max": 0.003277888612501556, "clip_ratio/high_mean": 0.0009978387661249144, "clip_ratio/low_mean": 0.0007292624004548998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0017271011383854784, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1854.0, "completions/mean_length": 798.4933471679688, "completions/mean_terminated_length": 501.637451171875, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 14.643731778425655, "grad_norm": 0.955542266368866, "learning_rate": 1e-06, "loss": -0.0263, "num_tokens": 850344489.0, "reward": 0.7131696939468384, "reward_std": 0.1295209378004074, "rewards/verify_math_reward/mean": 0.7131696343421936, "rewards/verify_math_reward/std": 0.4525342583656311, "step": 1567 }, { "clip_ratio/high_max": 0.001533329370431602, "clip_ratio/high_mean": 0.0005405441042967141, "clip_ratio/low_mean": 0.00026572002070679446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008062641172728036, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3637.0, "completions/mean_length": 926.1060791015625, "completions/mean_terminated_length": 550.1510620117188, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 14.653061224489797, "grad_norm": 0.22720454633235931, "learning_rate": 1e-06, "loss": -0.0323, "num_tokens": 850867768.0, "reward": 0.6863839626312256, "reward_std": 0.10772737860679626, "rewards/verify_math_reward/mean": 0.6863839030265808, "rewards/verify_math_reward/std": 0.46422141790390015, "step": 1568 }, { "clip_ratio/high_max": 0.001610024191904813, "clip_ratio/high_mean": 0.0005536392818612512, "clip_ratio/low_mean": 0.0003219873933630879, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008756266615819186, "completions/clipped_ratio": 0.1316964285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4084.0, "completions/mean_length": 1040.7623291015625, "completions/mean_terminated_length": 577.3714599609375, "completions/min_length": 139.0, "completions/min_terminated_length": 139.0, "epoch": 14.662390670553936, "grad_norm": 0.26372191309928894, "learning_rate": 1e-06, "loss": -0.0812, "num_tokens": 851417939.0, "reward": 0.6116071939468384, "reward_std": 0.1159176379442215, "rewards/verify_math_reward/mean": 0.6116071343421936, "rewards/verify_math_reward/std": 0.48765692114830017, "step": 1569 }, { "clip_ratio/high_max": 0.001548176569485804, "clip_ratio/high_mean": 0.0005403070363172446, "clip_ratio/low_mean": 0.00031034092103254807, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008506479543939349, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3289.0, "completions/mean_length": 969.5904541015625, "completions/mean_terminated_length": 522.96044921875, "completions/min_length": 138.0, "completions/min_terminated_length": 138.0, "epoch": 14.671720116618076, "grad_norm": 0.5310737490653992, "learning_rate": 1e-06, "loss": -0.0419, "num_tokens": 851915020.0, "reward": 0.6651785969734192, "reward_std": 0.09281206876039505, "rewards/verify_math_reward/mean": 0.6651785969734192, "rewards/verify_math_reward/std": 0.47219157218933105, "step": 1570 }, { "clip_ratio/high_max": 0.0023266314528882504, "clip_ratio/high_mean": 0.001036266794471885, "clip_ratio/low_mean": 0.0004133927404836868, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014496595322270878, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3749.0, "completions/mean_length": 917.7969360351562, "completions/mean_terminated_length": 540.8564453125, "completions/min_length": 137.0, "completions/min_terminated_length": 137.0, "epoch": 14.681049562682215, "grad_norm": 0.41868698596954346, "learning_rate": 1e-06, "loss": -0.0491, "num_tokens": 852440534.0, "reward": 0.660714328289032, "reward_std": 0.16889871656894684, "rewards/verify_math_reward/mean": 0.6607142686843872, "rewards/verify_math_reward/std": 0.4737313687801361, "step": 1571 }, { "clip_ratio/high_max": 0.0022973095401539467, "clip_ratio/high_mean": 0.0007713541972407256, "clip_ratio/low_mean": 0.0002951213218693738, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010664755009202054, "completions/clipped_ratio": 0.1361607142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3171.0, "completions/mean_length": 1020.1428833007812, "completions/mean_terminated_length": 535.31787109375, "completions/min_length": 128.0, "completions/min_terminated_length": 128.0, "epoch": 14.690379008746355, "grad_norm": 0.4241335988044739, "learning_rate": 1e-06, "loss": -0.0658, "num_tokens": 852952366.0, "reward": 0.637276828289032, "reward_std": 0.11937998235225677, "rewards/verify_math_reward/mean": 0.6372767686843872, "rewards/verify_math_reward/std": 0.481054425239563, "step": 1572 }, { "clip_ratio/high_max": 0.0019253582649980672, "clip_ratio/high_mean": 0.0007438572683895472, "clip_ratio/low_mean": 0.0004454248191905208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011892820912180468, "completions/clipped_ratio": 0.1808035714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 2997.0, "completions/mean_length": 1219.53125, "completions/mean_terminated_length": 584.6702880859375, "completions/min_length": 135.0, "completions/min_terminated_length": 135.0, "epoch": 14.699708454810496, "grad_norm": 0.2726913392543793, "learning_rate": 1e-06, "loss": -0.0803, "num_tokens": 853472754.0, "reward": 0.535714328289032, "reward_std": 0.15312324464321136, "rewards/verify_math_reward/mean": 0.5357142686843872, "rewards/verify_math_reward/std": 0.4990014135837555, "step": 1573 }, { "clip_ratio/high_max": 0.001717471230222145, "clip_ratio/high_mean": 0.0005929701642344298, "clip_ratio/low_mean": 0.0002548115628542291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008477817191305803, "completions/clipped_ratio": 0.1049107142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 2517.0, "completions/mean_length": 876.1663208007812, "completions/mean_terminated_length": 498.7793273925781, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 14.709037900874636, "grad_norm": 0.2161475121974945, "learning_rate": 1e-06, "loss": -0.0499, "num_tokens": 853960511.0, "reward": 0.6908482313156128, "reward_std": 0.10092676430940628, "rewards/verify_math_reward/mean": 0.6908482313156128, "rewards/verify_math_reward/std": 0.46240198612213135, "step": 1574 }, { "clip_ratio/high_max": 0.0019312379954499193, "clip_ratio/high_mean": 0.0008385988039663061, "clip_ratio/low_mean": 0.0004536276237558923, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012922264049848309, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2167.0, "completions/mean_length": 878.0881958007812, "completions/mean_terminated_length": 518.7680053710938, "completions/min_length": 127.0, "completions/min_terminated_length": 127.0, "epoch": 14.718367346938775, "grad_norm": 0.28904110193252563, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 854460214.0, "reward": 0.684151828289032, "reward_std": 0.16522133350372314, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1575 }, { "clip_ratio/high_max": 0.002119021868566051, "clip_ratio/high_mean": 0.0007490015395887895, "clip_ratio/low_mean": 0.00047770162291271845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001226703174324939, "completions/clipped_ratio": 0.1116071428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3699.0, "completions/mean_length": 959.755615234375, "completions/mean_terminated_length": 565.7550048828125, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 14.727696793002915, "grad_norm": 0.5477799773216248, "learning_rate": 1e-06, "loss": -0.0265, "num_tokens": 855017243.0, "reward": 0.6183035969734192, "reward_std": 0.13463158905506134, "rewards/verify_math_reward/mean": 0.6183035969734192, "rewards/verify_math_reward/std": 0.4860740303993225, "step": 1576 }, { "clip_ratio/high_max": 0.0015941970887070056, "clip_ratio/high_mean": 0.0005644643433697638, "clip_ratio/low_mean": 0.00034719701125141, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009116613437072374, "completions/clipped_ratio": 0.1194196428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3709.0, "completions/mean_length": 939.9699096679688, "completions/mean_terminated_length": 511.9657897949219, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 14.737026239067056, "grad_norm": 0.2434639036655426, "learning_rate": 1e-06, "loss": -0.1015, "num_tokens": 855520512.0, "reward": 0.6875000596046448, "reward_std": 0.11666832119226456, "rewards/verify_math_reward/mean": 0.6875, "rewards/verify_math_reward/std": 0.4637712836265564, "step": 1577 }, { "clip_ratio/high_max": 0.0019176306414010469, "clip_ratio/high_mean": 0.0006858818605905981, "clip_ratio/low_mean": 0.00029949311669952294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009853749816102209, "completions/clipped_ratio": 0.0825892857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 2435.0, "completions/mean_length": 805.6864013671875, "completions/mean_terminated_length": 509.47808837890625, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 14.746355685131196, "grad_norm": 0.2507024109363556, "learning_rate": 1e-06, "loss": -0.0487, "num_tokens": 856019855.0, "reward": 0.7276785969734192, "reward_std": 0.14391450583934784, "rewards/verify_math_reward/mean": 0.7276785969734192, "rewards/verify_math_reward/std": 0.4454030692577362, "step": 1578 }, { "clip_ratio/high_max": 0.001521907644928433, "clip_ratio/high_mean": 0.0005420800125648384, "clip_ratio/low_mean": 0.0003341919791637338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000876272022651392, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 1684.0, "completions/mean_length": 843.622802734375, "completions/mean_terminated_length": 507.1699523925781, "completions/min_length": 155.0, "completions/min_terminated_length": 155.0, "epoch": 14.755685131195335, "grad_norm": 0.23318514227867126, "learning_rate": 1e-06, "loss": -0.0467, "num_tokens": 856526765.0, "reward": 0.699776828289032, "reward_std": 0.11814073473215103, "rewards/verify_math_reward/mean": 0.6997767686843872, "rewards/verify_math_reward/std": 0.4586109220981598, "step": 1579 }, { "clip_ratio/high_max": 0.0023186022008303553, "clip_ratio/high_mean": 0.0009736781994433841, "clip_ratio/low_mean": 0.0004227125818943023, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001396390800437075, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3698.0, "completions/mean_length": 962.54248046875, "completions/mean_terminated_length": 528.5565185546875, "completions/min_length": 165.0, "completions/min_terminated_length": 165.0, "epoch": 14.765014577259475, "grad_norm": 0.3683277368545532, "learning_rate": 1e-06, "loss": -0.0837, "num_tokens": 857033531.0, "reward": 0.7020089626312256, "reward_std": 0.1603735387325287, "rewards/verify_math_reward/mean": 0.7020089030265808, "rewards/verify_math_reward/std": 0.45763099193573, "step": 1580 }, { "clip_ratio/high_max": 0.0021947820932837203, "clip_ratio/high_mean": 0.0007659841521672206, "clip_ratio/low_mean": 0.00047243521657946985, "clip_ratio/low_min": 2.3088288799044676e-05, "clip_ratio/region_mean": 0.001238419357832754, "completions/clipped_ratio": 0.1171875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3981.0, "completions/mean_length": 998.21435546875, "completions/mean_terminated_length": 587.0037841796875, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 14.774344023323614, "grad_norm": 0.26309913396835327, "learning_rate": 1e-06, "loss": -0.0339, "num_tokens": 857599387.0, "reward": 0.59375, "reward_std": 0.14484810829162598, "rewards/verify_math_reward/mean": 0.59375, "rewards/verify_math_reward/std": 0.4914066195487976, "step": 1581 }, { "clip_ratio/high_max": 0.0015159290342126042, "clip_ratio/high_mean": 0.0004449128691703663, "clip_ratio/low_mean": 0.00022011106489117083, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006650239320151741, "completions/clipped_ratio": 0.109375, "completions/max_length": 4096.0, "completions/max_terminated_length": 2786.0, "completions/mean_length": 924.404052734375, "completions/mean_terminated_length": 534.9097900390625, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.783673469387756, "grad_norm": 0.22830374538898468, "learning_rate": 1e-06, "loss": -0.0226, "num_tokens": 858118613.0, "reward": 0.6104910969734192, "reward_std": 0.10585013777017593, "rewards/verify_math_reward/mean": 0.6104910969734192, "rewards/verify_math_reward/std": 0.48791128396987915, "step": 1582 }, { "clip_ratio/high_max": 0.0013959056595922448, "clip_ratio/high_mean": 0.0004791517267221934, "clip_ratio/low_mean": 0.00022531271088155336, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0007044644298730418, "completions/clipped_ratio": 0.1216517857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 1790.0, "completions/mean_length": 937.1038208007812, "completions/mean_terminated_length": 499.5946350097656, "completions/min_length": 174.0, "completions/min_terminated_length": 174.0, "epoch": 14.793002915451895, "grad_norm": 0.26100870966911316, "learning_rate": 1e-06, "loss": -0.036, "num_tokens": 858596634.0, "reward": 0.7198660969734192, "reward_std": 0.09923359751701355, "rewards/verify_math_reward/mean": 0.7198660969734192, "rewards/verify_math_reward/std": 0.44931527972221375, "step": 1583 }, { "clip_ratio/high_max": 0.002612857635540422, "clip_ratio/high_mean": 0.0008548300629627192, "clip_ratio/low_mean": 0.00039178794963845576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012466180451156106, "completions/clipped_ratio": 0.1517857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3931.0, "completions/mean_length": 1072.2757568359375, "completions/mean_terminated_length": 531.1881713867188, "completions/min_length": 164.0, "completions/min_terminated_length": 164.0, "epoch": 14.802332361516035, "grad_norm": 0.30906954407691956, "learning_rate": 1e-06, "loss": -0.086, "num_tokens": 859086321.0, "reward": 0.621651828289032, "reward_std": 0.14759187400341034, "rewards/verify_math_reward/mean": 0.6216517686843872, "rewards/verify_math_reward/std": 0.485245943069458, "step": 1584 }, { "clip_ratio/high_max": 0.002269463788252324, "clip_ratio/high_mean": 0.0009261393788619898, "clip_ratio/low_mean": 0.0004527127066467074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013788520627713297, "completions/clipped_ratio": 0.1707589285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2527.0, "completions/mean_length": 1170.0770263671875, "completions/mean_terminated_length": 567.5652465820312, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 14.811661807580174, "grad_norm": 0.2833002209663391, "learning_rate": 1e-06, "loss": -0.1193, "num_tokens": 859604014.0, "reward": 0.6127232313156128, "reward_std": 0.16645805537700653, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1585 }, { "clip_ratio/high_max": 0.001749017865222413, "clip_ratio/high_mean": 0.0006315624268609099, "clip_ratio/low_mean": 0.00038260512474153074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001014167552057188, "completions/clipped_ratio": 0.125, "completions/max_length": 4096.0, "completions/max_terminated_length": 3774.0, "completions/mean_length": 987.0491333007812, "completions/mean_terminated_length": 542.9132690429688, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.820991253644316, "grad_norm": 0.7678061723709106, "learning_rate": 1e-06, "loss": -0.0335, "num_tokens": 860122314.0, "reward": 0.684151828289032, "reward_std": 0.12050653249025345, "rewards/verify_math_reward/mean": 0.6841517686843872, "rewards/verify_math_reward/std": 0.4651124179363251, "step": 1586 }, { "clip_ratio/high_max": 0.0014779198972973973, "clip_ratio/high_mean": 0.0005677122862834949, "clip_ratio/low_mean": 0.00037268871847118135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00094040101612336, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3414.0, "completions/mean_length": 1022.0201416015625, "completions/mean_terminated_length": 587.3554077148438, "completions/min_length": 129.0, "completions/min_terminated_length": 129.0, "epoch": 14.830320699708455, "grad_norm": 0.3296527862548828, "learning_rate": 1e-06, "loss": -0.0528, "num_tokens": 860668564.0, "reward": 0.6037946939468384, "reward_std": 0.15135657787322998, "rewards/verify_math_reward/mean": 0.6037946343421936, "rewards/verify_math_reward/std": 0.48938122391700745, "step": 1587 }, { "clip_ratio/high_max": 0.002380060795985628, "clip_ratio/high_mean": 0.0007494002784369513, "clip_ratio/low_mean": 0.0005925272116655833, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013419274910120293, "completions/clipped_ratio": 0.1060267857142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3152.0, "completions/mean_length": 887.763427734375, "completions/mean_terminated_length": 507.26092529296875, "completions/min_length": 145.0, "completions/min_terminated_length": 145.0, "epoch": 14.839650145772595, "grad_norm": 0.7693892121315002, "learning_rate": 1e-06, "loss": -0.045, "num_tokens": 861167424.0, "reward": 0.7053571939468384, "reward_std": 0.13398967683315277, "rewards/verify_math_reward/mean": 0.7053571343421936, "rewards/verify_math_reward/std": 0.45613667368888855, "step": 1588 }, { "clip_ratio/high_max": 0.0017301673724432476, "clip_ratio/high_mean": 0.0006455211268985295, "clip_ratio/low_mean": 0.00035498673514666734, "clip_ratio/low_min": 1.2913223145005759e-05, "clip_ratio/region_mean": 0.001000507858407218, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3771.0, "completions/mean_length": 1087.9129638671875, "completions/mean_terminated_length": 591.1287231445312, "completions/min_length": 132.0, "completions/min_terminated_length": 132.0, "epoch": 14.848979591836734, "grad_norm": 0.26049551367759705, "learning_rate": 1e-06, "loss": -0.0553, "num_tokens": 861710810.0, "reward": 0.590401828289032, "reward_std": 0.13940368592739105, "rewards/verify_math_reward/mean": 0.5904017686843872, "rewards/verify_math_reward/std": 0.49203425645828247, "step": 1589 }, { "clip_ratio/high_max": 0.0015018580361356726, "clip_ratio/high_mean": 0.00042192500177407055, "clip_ratio/low_mean": 0.0002517023790460371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006736273935530335, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2102.0, "completions/mean_length": 1017.771240234375, "completions/mean_terminated_length": 546.3307495117188, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 14.858309037900874, "grad_norm": 0.19188618659973145, "learning_rate": 1e-06, "loss": -0.0583, "num_tokens": 862228093.0, "reward": 0.6127232313156128, "reward_std": 0.1025739535689354, "rewards/verify_math_reward/mean": 0.6127232313156128, "rewards/verify_math_reward/std": 0.4873998463153839, "step": 1590 }, { "clip_ratio/high_max": 0.0018410165939712897, "clip_ratio/high_mean": 0.0006286225598159945, "clip_ratio/low_mean": 0.0005019946329412051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011306172164040618, "completions/clipped_ratio": 0.1238839285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2177.0, "completions/mean_length": 949.9855346679688, "completions/mean_terminated_length": 505.1350402832031, "completions/min_length": 150.0, "completions/min_terminated_length": 150.0, "epoch": 14.867638483965015, "grad_norm": 0.22820855677127838, "learning_rate": 1e-06, "loss": -0.0593, "num_tokens": 862712840.0, "reward": 0.6674107313156128, "reward_std": 0.1474849283695221, "rewards/verify_math_reward/mean": 0.6674107313156128, "rewards/verify_math_reward/std": 0.47140392661094666, "step": 1591 }, { "clip_ratio/high_max": 0.002091548369207885, "clip_ratio/high_mean": 0.0008278338173113298, "clip_ratio/low_mean": 0.0003703655561366759, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001198199337522965, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 3829.0, "completions/mean_length": 830.4263916015625, "completions/mean_terminated_length": 492.6083679199219, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.876967930029155, "grad_norm": 0.3046199083328247, "learning_rate": 1e-06, "loss": -0.068, "num_tokens": 863200670.0, "reward": 0.7366071939468384, "reward_std": 0.16104431450366974, "rewards/verify_math_reward/mean": 0.7366071343421936, "rewards/verify_math_reward/std": 0.44071969389915466, "step": 1592 }, { "clip_ratio/high_max": 0.0031508898828178644, "clip_ratio/high_mean": 0.0009709434707474429, "clip_ratio/low_mean": 0.00040443729994876776, "clip_ratio/low_min": 1.72986437974032e-05, "clip_ratio/region_mean": 0.0013753807579632849, "completions/clipped_ratio": 0.1417410714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3477.0, "completions/mean_length": 1046.149658203125, "completions/mean_terminated_length": 542.4681396484375, "completions/min_length": 147.0, "completions/min_terminated_length": 147.0, "epoch": 14.886297376093294, "grad_norm": 0.655681312084198, "learning_rate": 1e-06, "loss": -0.0493, "num_tokens": 863714060.0, "reward": 0.6462053656578064, "reward_std": 0.15871354937553406, "rewards/verify_math_reward/mean": 0.6462053656578064, "rewards/verify_math_reward/std": 0.478413462638855, "step": 1593 }, { "clip_ratio/high_max": 0.0019156091511831619, "clip_ratio/high_mean": 0.00046137945810187375, "clip_ratio/low_mean": 0.00014914218763806275, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0006105216471041786, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 2171.0, "completions/mean_length": 1073.6395263671875, "completions/mean_terminated_length": 490.0945129394531, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 14.895626822157434, "grad_norm": 0.23443014919757843, "learning_rate": 1e-06, "loss": -0.0352, "num_tokens": 864174401.0, "reward": 0.6361607313156128, "reward_std": 0.06891624629497528, "rewards/verify_math_reward/mean": 0.6361607313156128, "rewards/verify_math_reward/std": 0.4813718795776367, "step": 1594 }, { "clip_ratio/high_max": 0.0020316538721090183, "clip_ratio/high_mean": 0.0007326751583605073, "clip_ratio/low_mean": 0.0002692601053695398, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010019352775998414, "completions/clipped_ratio": 0.1350446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3215.0, "completions/mean_length": 1024.97216796875, "completions/mean_terminated_length": 545.4954833984375, "completions/min_length": 151.0, "completions/min_terminated_length": 151.0, "epoch": 14.904956268221575, "grad_norm": 0.319247305393219, "learning_rate": 1e-06, "loss": -0.0554, "num_tokens": 864687792.0, "reward": 0.6662946939468384, "reward_std": 0.12433795630931854, "rewards/verify_math_reward/mean": 0.6662946343421936, "rewards/verify_math_reward/std": 0.47179922461509705, "step": 1595 }, { "clip_ratio/high_max": 0.0016947853328019846, "clip_ratio/high_mean": 0.0006471200695159496, "clip_ratio/low_mean": 0.00041615926420490723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010632793600962032, "completions/clipped_ratio": 0.1104910714285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3002.0, "completions/mean_length": 967.8850708007812, "completions/mean_terminated_length": 579.32373046875, "completions/min_length": 154.0, "completions/min_terminated_length": 154.0, "epoch": 14.914285714285715, "grad_norm": 0.3197633624076843, "learning_rate": 1e-06, "loss": -0.0531, "num_tokens": 865235209.0, "reward": 0.625, "reward_std": 0.15398269891738892, "rewards/verify_math_reward/mean": 0.625, "rewards/verify_math_reward/std": 0.48439329862594604, "step": 1596 }, { "clip_ratio/high_max": 0.00191968068247661, "clip_ratio/high_mean": 0.0006238934201974189, "clip_ratio/low_mean": 0.00027419712159826304, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008980905367934611, "completions/clipped_ratio": 0.1004464285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 2439.0, "completions/mean_length": 859.529052734375, "completions/mean_terminated_length": 498.136474609375, "completions/min_length": 153.0, "completions/min_terminated_length": 153.0, "epoch": 14.923615160349854, "grad_norm": 0.23354937136173248, "learning_rate": 1e-06, "loss": -0.0534, "num_tokens": 865718387.0, "reward": 0.7120535969734192, "reward_std": 0.11114512383937836, "rewards/verify_math_reward/mean": 0.7120535969734192, "rewards/verify_math_reward/std": 0.4530589282512665, "step": 1597 }, { "clip_ratio/high_max": 0.0017583142580406275, "clip_ratio/high_mean": 0.0007182293265941553, "clip_ratio/low_mean": 0.0003164928714340931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010347222105338005, "completions/clipped_ratio": 0.1328125, "completions/max_length": 4096.0, "completions/max_terminated_length": 2242.0, "completions/mean_length": 1035.08154296875, "completions/mean_terminated_length": 566.2921752929688, "completions/min_length": 133.0, "completions/min_terminated_length": 133.0, "epoch": 14.932944606413994, "grad_norm": 0.2821706533432007, "learning_rate": 1e-06, "loss": -0.061, "num_tokens": 866242380.0, "reward": 0.6540178656578064, "reward_std": 0.12148292362689972, "rewards/verify_math_reward/mean": 0.6540178656578064, "rewards/verify_math_reward/std": 0.4759531021118164, "step": 1598 }, { "clip_ratio/high_max": 0.0017521736481285188, "clip_ratio/high_mean": 0.0005596852179223788, "clip_ratio/low_mean": 0.00033856697450573847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00089825220311468, "completions/clipped_ratio": 0.1618303571428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 3748.0, "completions/mean_length": 1095.7020263671875, "completions/mean_terminated_length": 516.416748046875, "completions/min_length": 131.0, "completions/min_terminated_length": 131.0, "epoch": 14.942274052478133, "grad_norm": 1.2675962448120117, "learning_rate": 1e-06, "loss": -0.0356, "num_tokens": 866722681.0, "reward": 0.6339285969734192, "reward_std": 0.11592016369104385, "rewards/verify_math_reward/mean": 0.6339285969734192, "rewards/verify_math_reward/std": 0.48199835419654846, "step": 1599 }, { "clip_ratio/high_max": 0.0015007805759523762, "clip_ratio/high_mean": 0.0005371225406634039, "clip_ratio/low_mean": 0.00045821217690900085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009953347180271521, "completions/clipped_ratio": 0.1506696428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3126.0, "completions/mean_length": 1096.5570068359375, "completions/mean_terminated_length": 564.4612426757812, "completions/min_length": 180.0, "completions/min_terminated_length": 180.0, "epoch": 14.951603498542275, "grad_norm": 0.36327141523361206, "learning_rate": 1e-06, "loss": -0.0548, "num_tokens": 867252636.0, "reward": 0.5613839626312256, "reward_std": 0.13583439588546753, "rewards/verify_math_reward/mean": 0.5613839030265808, "rewards/verify_math_reward/std": 0.496494859457016, "step": 1600 }, { "epoch": 14.951603498542275, "step": 1600, "total_flos": 0.0, "train_loss": 21.70611201519991, "train_runtime": 127568.9117, "train_samples_per_second": 11.238, "train_steps_per_second": 0.013 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 867252636, "num_train_epochs": 15, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }